source("../functions.R")
tol12qualitative=c("#332288", "#6699CC", "#88CCEE", "#44AA99", "#117733", "#999933", "#DDCC77", "#661100", "#CC6677", "#AA4466", "#882255", "#AA4499")
if (!file.exists("output")) {
system("mkdir output")
}
parallel = TRUE
ncores = 10
if (!require(DCARS)) {
library(devtools)
devtools::install_github("shazanfar/DCARS")
}
library(DCARS)
library(ggplot2)
library(gplots)
library(SingleCellExperiment)
library(scater)
library(scran)
library(reshape)
library(scattermore)
library(dynamicTreeCut)
library(ComplexHeatmap)
library(GO.db)
library(org.Mm.eg.db)
library(stringr)
library(matrixStats)
library(ggforce)
library(patchwork)
library(ggpubr)
library(cowplot)
library(parallel)
library(monocle)
library(ggthemes)
library(igraph)
library(gtools)
library(plotly)
library(ggrepel)
library(ggridges)
library(M3Drop)
library(scMerge)
library(corrplot)
library(circlize)
library(GGally)
library(corrplot)
library(UpSetR)
library(minerva)
library(energy)
if (!file.exists("output/liver_pseudotime.Rds")) {
# data webpage https://sydneybiox.github.io/scMerge/articles/Mouse_Liver_Data/Mouse_Liver_Data.html
# data file URL http://www.maths.usyd.edu.au/u/yingxinl/wwwnb/scMergeData/liver_scMerge.rds
# might take a minute to download
liver_scMerge = readRDS(url("http://www.maths.usyd.edu.au/u/yingxinl/wwwnb/scMergeData/liver_scMerge.rds"))
dim(assay(liver_scMerge,"scMerge"))
assay(liver_scMerge,"scMerge")[1:5,1:5]
table(liver_scMerge$cellTypes, liver_scMerge$batch)
hepa_cellTypes = c("hepatoblast/hepatocyte","cholangiocyte")
liver_scMerge$isHepa = ifelse(liver_scMerge$cellTypes %in% hepa_cellTypes, "Hepa", "Other")
liver_scMerge <- runPCA(liver_scMerge, exprs_values = "scMerge", ncomponents = 50)
set.seed(484)
liver_scMerge <- runTSNE(liver_scMerge, use_dimred = "PCA")
tsne_hepa = reducedDim(liver_scMerge, "TSNE")[liver_scMerge$isHepa == "Hepa",]
bounding_df = data.frame(
x = 1.1*range(tsne_hepa[,1])[c(1,2,2,1,1)],
y = 1.1*range(tsne_hepa[,2])[c(1,1,2,2,1)]
)
tsne_df = as.data.frame(cbind(
colData(liver_scMerge),
reducedDim(liver_scMerge, "TSNE")[,1:2]
))
g = ggplot(tsne_df, aes(x = V1, y = V2, colour = cellTypes)) +
geom_point(size = 2.5) +
theme_classic() +
theme(legend.position = "bottom") +
xlab("TSNE 1") +
ylab("TSNE 2") +
theme(axis.title = element_text(size = 30)) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()) +
theme(legend.text = element_text(size = 10)) +
scale_colour_gdocs() +
geom_path(data = bounding_df, aes(x = x, y = y),
linetype = "dashed", colour = "red",
size = 1) +
guides(colour=guide_legend(nrow=3,byrow=TRUE)) +
NULL
g
ggsave(g, file = "output/UMAP_liver_scMerge.pdf", height = 7.5, width = 7)
liver_scMerge$Estage = regmatches(colnames(liver_scMerge), regexpr("E[0-9][0-9].[0-9]|E[0-9].[0-9]", colnames(liver_scMerge)))
table(liver_scMerge$Estage, liver_scMerge$isHepa, liver_scMerge$batch)
tfac = list(liver_scMerge$Estage, liver_scMerge$batch)
liver_scMerge$width = unsplit(tapply(rep(1,ncol(liver_scMerge)), tfac, sum), tfac)
liver_scMerge$width_transformed = sqrt(liver_scMerge$width)
liver_scMerge$isHepaSwitch = factor(liver_scMerge$isHepa, levels = c("Other","Hepa"))
liver_scMerge$Estage = factor(liver_scMerge$Estage, levels = gtools::mixedsort(unique(liver_scMerge$Estage)))
liver_scMerge2 = liver_scMerge[,liver_scMerge$batch %in% subset(colData(liver_scMerge), isHepa == "Hepa")$batch]
g = ggplot(as.data.frame(colData(liver_scMerge2)),
aes(y = 1, fill = isHepaSwitch)) +
geom_bar(aes(width = width_transformed, x = 0 + width_transformed/2),
stat = "identity", position = "fill") +
coord_polar("y", start = 0) +
facet_grid(Estage ~ batch, switch = "y") +
theme_minimal() +
theme(strip.text.y = element_text(angle = 180)) +
theme(strip.text.x = element_text(angle = 90)) +
theme(panel.grid = element_blank()) +
theme(axis.text = element_blank()) +
theme(strip.placement = "inside") +
theme(legend.position = "bottom") +
theme(legend.direction = "vertical") +
guides(fill = guide_legend(reverse = TRUE)) +
labs(fill = "") +
scale_fill_manual(values = c("Hepa" = "red", "Other" = "black"),
labels = c("Hepa" = "Hepatic cells", "Other" = "Other cells")) +
theme(strip.text.y = element_text(size = 13)) +
theme(strip.text.x = element_text(size = 15)) +
theme(legend.text = element_text(size = 20)) +
xlab("") +
ylab("") +
NULL
g
ggsave(g, file = "output/hepatic_cells_piechart.pdf", height = 8, width = 6)
table(liver_scMerge$cellTypes,liver_scMerge$batch)
ggplot(melt(table(liver_scMerge$cellTypes,liver_scMerge$batch)),
aes(x = Var.2, group = Var.1, y = value, fill = Var.1)) + geom_bar(stat = "identity") +
theme_minimal() +
scale_fill_brewer(palette = "Spectral")
ggplot(melt(table(liver_scMerge$cellTypes,liver_scMerge$batch)),
aes(x = Var.1, group = Var.2, y = value, fill = Var.2)) + geom_bar(stat = "identity") +
theme_minimal() +
scale_fill_brewer(palette = "RdBu")
hepa_scMerge = liver_scMerge[,liver_scMerge$isHepa == "Hepa"]
var.fit <- trendVar(hepa_scMerge, assay.type = "scMerge", parametric=TRUE, loess.args=list(span=0.3), use.spikes = FALSE)
var.out <- decomposeVar(hepa_scMerge, var.fit, assay.type = "scMerge")
pdf(file = "output/HVG_selection.pdf", height = 8, width = 8)
plot(var.out$mean, var.out$total, pch=16, cex=0.6, xlab="Mean log-expression",
ylab="Variance of log-expression")
curve(var.fit$trend(x), col="dodgerblue", lwd=2, add=TRUE)
seqvals = seq(min(var.out$mean), max(var.out$mean), length.out = 1000)
peakExp = seqvals[which.max(var.fit$trend(seqvals))]
hvg.out <- var.out[which(var.out$FDR <= 0.05 & var.out$mean > peakExp),]
nrow(hvg.out)
hvg.out <- hvg.out[order(hvg.out$bio, decreasing=TRUE),]
points(var.out$mean[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)],
var.out$total[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)], col="red", pch=16)
abline(v = peakExp, lty = 2, col = "black")
dev.off()
head(hvg.out)
HVG = sort(rownames(hvg.out))
length(HVG)
geneNames <- data.frame(gene_short_name=rownames(hepa_scMerge))
rownames(geneNames) <- rownames(hepa_scMerge)
fd <- new("AnnotatedDataFrame", data = geneNames)
pd <- data.frame(cellType = hepa_scMerge$cellTypes,batch = hepa_scMerge$batch)
rownames(pd) <- colnames(hepa_scMerge)
pd <- new("AnnotatedDataFrame", data = pd)
exprMat <- assay(hepa_scMerge, "scMerge")
liver <- newCellDataSet(exprMat, expressionFamily = uninormal(), phenoData = pd, featureData=fd)
liver <- estimateSizeFactors(liver)
liver <- setOrderingFilter(liver,HVG)
liver <- reduceDimension(liver, max_components=2, method = "DDRTree",norm_method ="none")
liver <- orderCells(liver)
table(pData(liver)$cellType,pData(liver)$State)
# Ahsg gene is meant to be lowly expressed in hepatoblasts and then highly expressed in hepatocytes
# thus we can tell that the lower branch is the root state
root_state = levels(pData(liver)$State)[which.min(tapply(exprMat["Ahsg",],pData(liver)$State, mean))]
chol_state = levels(pData(liver)$State)[which.max(tapply(exprMat["Epcam",],pData(liver)$State, mean))]
hep_state = setdiff(levels(pData(liver)$State), c(root_state, chol_state))
states = c("Hepatoblast","Cholangiocyte","Hepatocyte")
names(states) = c(root_state, chol_state, hep_state)
g1 = plot_cell_trajectory(liver, color_by="State", cell_size = 3, x = 2, y = 1) +
scale_y_reverse() +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()) +
theme(axis.title = element_text(size = 20)) +
theme(legend.position = "bottom") +
scale_color_tableau(labels = states) +
theme(legend.text = element_text(size = 17)) +
labs(color = "") +
guides(color = guide_legend(reverse = TRUE)) +
NULL
g1
ggsave(g1, file = "output/hepa_monocle.pdf",height = 6, width = 6)
# add bounding boxes to the monocle graphs
liver$nameStates = states[as.character(liver$State)]
dimred_liver = t(liver@reducedDimS)
expandRange = function(range, diff = 0.5){
return(c(range[1]-diff, range[2] + diff))
}
bounding_df_liver_first = data.frame(
x = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast"),1]))[c(1,2,2,1,1)],
y = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast"),2]), diff = 0.5)[c(1,1,2,2,1)]
)
g11 = g1 + geom_path(data = bounding_df_liver_first, aes(x = y, y = x),
linetype = "dashed", colour = "red",
size = 1) +
NULL
g11
ggsave(g11, file = "output/hepa_monocle_box_first.pdf", height = 6, width = 6)
bounding_df_liver_hep = data.frame(
x = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast", "Hepatocyte"),1]))[c(1,2,2,1,1)],
y = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast", "Hepatocyte"),2]), diff = 0.5)[c(1,1,2,2,1)]
)
g12 = g1 + geom_path(data = bounding_df_liver_hep, aes(x = y, y = x),
linetype = "dashed", colour = "red",
size = 1) +
NULL
g12
ggsave(g12, file = "output/hepa_monocle_box_hep.pdf", height = 6, width = 6)
# highlight where the 5 points are
pointvals = round(seq(1,sum(liver$nameStates %in% c("Hepatoblast", "Hepatocyte")), length.out = 5))
ps = liver$Pseudotime
names(ps) <- colnames(liver)
ps <- ps[liver$nameStates %in% c("Hepatoblast", "Hepatocyte")]
pointnames = names(sort(ps))[pointvals]
ps_df = pData(liver)[pointnames,]
ps_df$Component1 = t(liver@reducedDimS[,pointnames])[,1]
ps_df$Component2 = t(liver@reducedDimS[,pointnames])[,2]
g12plus = g12 + geom_point(data = ps_df, aes(x = Component2, y = Component1),
inherit.aes = FALSE, size = 3)
g12plus
ggsave(g12plus, file = "output/hepa_monocle_box_hep_pointdots.pdf", height = 6, width = 6)
bounding_df_liver_chol = data.frame(
x = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast", "Cholangiocyte"),1]))[c(1,2,2,1,1)],
y = expandRange(range(dimred_liver[liver$nameStates %in% c("Hepatoblast", "Cholangiocyte"),2]), diff = 0.5)[c(1,1,2,2,1)]
)
g13 = g1 + geom_path(data = bounding_df_liver_chol, aes(x = y, y = x),
linetype = "dashed", colour = "red",
size = 1) +
NULL
g13
ggsave(g13, file = "output/hepa_monocle_box_chol.pdf", height = 6, width = 6)
g2 = plot_cell_trajectory(liver, color_by="batch")
g3 = plot_cell_trajectory(liver, color_by="cellType")
plottingmarkergenes = c("Gapdh","Sall4", "Epcam","Ahsg")
sapply(plottingmarkergenes, function(mark) {
g4 = plot_cell_trajectory(liver, markers=mark,
use_color_gradient = TRUE,
begin = 0, end = 1,
markers_linear = TRUE,
cell_size = 3,
x = 2, y = 1) +
scale_y_reverse() +
xlab("") + ylab("") +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()) +
theme(axis.title = element_text(size = 20)) +
theme(strip.text = element_text(size = 30, face = "italic")) +
theme(legend.position = "bottom") +
theme(legend.text = element_text(size = 17)) +
theme(legend.key.width = unit(1, "inches")) +
theme(legend.key.height = unit(0.1, "inches")) +
scale_color_viridis_c(breaks = c(0,10),
limits = c(0,10),
labels = c("Low","High")) +
scale_color_viridis_c() +
labs(color = "Expression") +
xlab(ifelse(mark == "Gapdh", "Expression", " ")) +
xlab("") +
guides(color = guide_colourbar(title.position = "top",
title.hjust = 0.5)) +
theme(legend.position = ifelse(mark == "Gapdh", "bottom", "none")) +
theme(legend.title = element_text(size = 20)) +
NULL
g4
if (mark == "Gapdh") {
g4 = g4 + scale_color_viridis_c(breaks = c(0,13),
limits = c(0,13),
labels = c("Low","High")) +
NULL
gg4 = as_ggplot(get_legend(g4))
g4 = gg4
}
ggsave(g4, file = paste0("output/hepa_monocle_", mark, ".pdf"),height = 6, width = 6)
})
liver$Estage = regmatches(colnames(liver), regexpr("E[0-9][0-9].[0-9]", colnames(liver)))
g6 = plot_cell_trajectory(liver, color_by="Estage", cell_size = 3,
x = 2, y = 1) +
scale_y_reverse() +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()) +
theme(axis.title = element_text(size = 20)) +
theme(legend.position = "top") +
scale_colour_brewer(palette = "Oranges") +
xlab("") + ylab("") +
theme(legend.text = element_text(size = 20)) +
labs(color = "") +
NULL
g6
ggsave(g6, file = "output/hepa_monocle_Estage.pdf",height = 6, width = 6)
g5 = plot_cell_trajectory(liver, markers="Epcam",
use_color_gradient = TRUE,
begin = 0, end = 1,
markers_linear = TRUE)
pData(liver)$TrajectoryState = states[as.character(pData(liver)$State)]
liver <- orderCells(liver,root_state = root_state)
liver
saveRDS(root_state, file = "output/root_state.Rds")
saveRDS(HVG, file = "output/HVG.Rds")
saveRDS(liver, file = "output/liver_pseudotime.Rds")
} else {
liver = readRDS("output/liver_pseudotime.Rds")
HVG = readRDS("output/HVG.Rds")
root_state = readRDS("output/root_state.Rds")
}
if (!file.exists("output/liver_branch_data.RData")) {
head(pData(liver))
plot(pData(liver)$Pseudotime, pData(liver)$State)
# expressed_genes
# require at least 20% of cells to express at least 3
hist(apply(liver,1,quantile, 0.9))
liver_hepa_genes = sort(union(rownames(liver)[apply(liver,1,quantile, 0.8) > 5], HVG))
length(liver_hepa_genes)
liver_expressed = liver[liver_hepa_genes,]
# order cells based on pseudotime
# decided what are the values based on manual checking of the States
# manu check shows state 3 is stem, state 2 is hep, state 1 is chol
branch_cells_hep = pData(liver_expressed)[pData(liver_expressed)$State %in% c(root_state, hep_state),]
branch_cells_chol = pData(liver_expressed)[pData(liver_expressed)$State %in% c(root_state, chol_state),]
# obtain the name of cells that represents the ranking
branch_cells_order_hep = rownames(sort_df(branch_cells_hep,"Pseudotime"))
branch_cells_order_chol = rownames(sort_df(branch_cells_chol,"Pseudotime"))
# Generate the gene expression for the hep and chol branches
liver_branch_hep = exprs(liver_expressed[,branch_cells_order_hep])
liver_branch_chol = exprs(liver_expressed[,branch_cells_order_chol])
# retain the pseudotime estimates
liver_pseudotime = liver$Pseudotime
names(liver_pseudotime) <- colnames(liver)
liver_pseudotime_hep = liver_pseudotime[colnames(liver_branch_hep)]
liver_pseudotime_chol = liver_pseudotime[colnames(liver_branch_chol)]
save(liver_pseudotime_hep, liver_pseudotime_chol,
liver_branch_hep, liver_branch_chol,
file = "output/liver_branch_data.RData")
} else {
load("output/liver_branch_data.RData")
}
if (!file.exists("output/DE_liver.RData")) {
DE_hep_raw = apply(liver_branch_hep,1,function(x){
print("testing")
fit3 = lm(x ~ liver_pseudotime_hep + I(liver_pseudotime_hep^2))
fit2 = lm(x ~ liver_pseudotime_hep)
fit1 = lm(x ~ 1)
f_21 = anova(fit2,fit1)$`Pr(>F)`[2]
f_32 = anova(fit3,fit2)$`Pr(>F)`[2]
return(min(c(1, nrow(liver_branch_hep)*f_21, nrow(liver_branch_hep)*f_32)))
} )
DE_chol_raw = apply(liver_branch_chol,1,function(x){
print("testing")
fit3 = lm(x ~ liver_pseudotime_chol + I(liver_pseudotime_chol^2))
fit2 = lm(x ~ liver_pseudotime_chol)
fit1 = lm(x ~ 1)
f_21 = anova(fit2,fit1)$`Pr(>F)`[2]
f_32 = anova(fit3,fit2)$`Pr(>F)`[2]
return(min(c(1, nrow(liver_branch_chol)*f_21, nrow(liver_branch_chol)*f_32)))
} )
nonDE_HVG_hep = intersect(HVG, names(which(DE_hep_raw > 0.05)))
nonDE_HVG_chol = intersect(HVG, names(which(DE_chol_raw > 0.05)))
save(DE_hep_raw, DE_chol_raw, nonDE_HVG_hep, nonDE_HVG_chol, file = "output/DE_liver.RData")
} else {
load("output/DE_liver.RData")
}
weightedVarMatrixStats = function(x, y, w) {
require(matrixStats)
weightedVar(x,w)
}
n_first = length(intersect(colnames(liver_branch_chol), colnames(liver_branch_hep)))
W_first = weightMatrix(n_first, span = 0.5)
W_chol = weightMatrix(ncol(liver_branch_chol), span = 0.5*n_first/ncol(liver_branch_chol))
W_hep = weightMatrix(ncol(liver_branch_hep), span = 0.5*n_first/ncol(liver_branch_hep))
if (!file.exists("output/DV_hep.RData")) {
set.seed(500)
same_pairs_hep = cbind(nonDE_HVG_hep, nonDE_HVG_hep)
hep_DV_stats = DCARSacrossNetwork(liver_branch_hep,
edgelist = same_pairs_hep,
W = W_hep,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractTestStatisticOnly = TRUE)
names(hep_DV_stats) <- nonDE_HVG_hep
hep_DV_wVars = t(DCARSacrossNetwork(liver_branch_hep,
edgelist = same_pairs_hep,
W = W_hep,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractWcorSequenceOnly = TRUE))
rownames(hep_DV_wVars) <- nonDE_HVG_hep
hep_DV_permstats_raw = DCARSacrossNetwork(liver_branch_hep,
edgelist = same_pairs_hep,
W = W_hep,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 500)
hep_DV_permstats = lapply(hep_DV_permstats_raw, unlist)
names(hep_DV_permstats) <- nonDE_HVG_hep
hep_DV_pvals = sapply(1:length(hep_DV_stats), function(i) {
mean(hep_DV_permstats[[i]] >= hep_DV_stats[i])
})
names(hep_DV_pvals) <- nonDE_HVG_hep
hep_DV_fdr = p.adjust(hep_DV_pvals, method = "BH")
save(W_hep, same_pairs_hep, hep_DV_stats, hep_DV_wVars, hep_DV_permstats, hep_DV_pvals, hep_DV_fdr,
file = "output/DV_hep.RData")
} else {
load("output/DV_hep.RData")
}
if (!file.exists("output/DV_chol.RData")) {
set.seed(500)
same_pairs_chol = cbind(nonDE_HVG_chol, nonDE_HVG_chol)
chol_DV_stats = DCARSacrossNetwork(liver_branch_chol,
edgelist = same_pairs_chol,
W = W_chol,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractTestStatisticOnly = TRUE)
names(chol_DV_stats) <- nonDE_HVG_chol
chol_DV_wVars = t(DCARSacrossNetwork(liver_branch_chol,
edgelist = same_pairs_chol,
W = W_chol,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractWcorSequenceOnly = TRUE))
rownames(chol_DV_wVars) <- nonDE_HVG_chol
chol_DV_permstats_raw = DCARSacrossNetwork(liver_branch_chol,
edgelist = same_pairs_chol,
W = W_chol,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 500)
chol_DV_permstats = lapply(chol_DV_permstats_raw, unlist)
names(chol_DV_permstats) <- nonDE_HVG_chol
chol_DV_pvals = sapply(1:length(chol_DV_stats), function(i) {
mean(chol_DV_permstats[[i]] >= chol_DV_stats[i])
})
names(chol_DV_pvals) <- nonDE_HVG_chol
chol_DV_fdr = p.adjust(chol_DV_pvals, method = "BH")
save(W_chol, same_pairs_chol, chol_DV_stats, chol_DV_wVars, chol_DV_permstats, chol_DV_pvals, chol_DV_fdr,
file = "output/DV_chol.RData")
} else {
load("output/DV_chol.RData")
}
if (!file.exists("output/first_data.RData")) {
first = exprs(liver[,intersect(colnames(liver_branch_hep), colnames(liver_branch_chol))])
first_pseudotime = liver_pseudotime_chol[intersect(colnames(liver_branch_hep), colnames(liver_branch_chol))]
var.fit <- trendVar(first, parametric = FALSE, loess.args=list(span=0.3), min.mean = 1.5, method = "loess")
var.out <- decomposeVar(first, var.fit)
plot(var.out$mean, var.out$total, pch=16, cex=0.6, xlab="Mean log-expression",
ylab="Variance of log-expression")
curve(var.fit$trend(x), col="dodgerblue", lwd=2, add=TRUE)
seqvals = seq(min(var.out$mean), max(var.out$mean), length.out = 1000)
peakExp = seqvals[which.max(var.fit$trend(seqvals))]
hvg.out <- var.out[which(var.out$FDR <= 0.05 & var.out$mean > peakExp),]
nrow(hvg.out)
hvg.out <- hvg.out[order(hvg.out$bio, decreasing=TRUE),]
points(var.out$mean[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)],
var.out$total[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)], col="red", pch=16)
abline(v = peakExp, lty = 2, col = "black")
head(hvg.out)
HVG_first = sort(rownames(hvg.out))
DE_first_raw = apply(first,1,function(x){
print("testing")
fit3 = lm(x ~ first_pseudotime + I(first_pseudotime^2))
fit2 = lm(x ~ first_pseudotime)
fit1 = lm(x ~ 1)
f_21 = anova(fit2,fit1)$`Pr(>F)`[2]
f_32 = anova(fit3,fit2)$`Pr(>F)`[2]
return(min(c(1, nrow(first)*f_21, nrow(first)*f_32)))
} )
nonDE_HVG_first = intersect(HVG_first, names(which(DE_first_raw > 0.05)))
save(first, first_pseudotime, HVG_first, nonDE_HVG_first, DE_first_raw, file = "output/first_data.RData")
} else {
load("output/first_data.RData")
}
if (!file.exists("output/first_DV.RData")) {
set.seed(500)
W_first = weightMatrix(ncol(first), span = 0.5)
same_pairs_first = cbind(nonDE_HVG_first, nonDE_HVG_first)
rownames(same_pairs_first) <- nonDE_HVG_first
first_DV_stats = DCARSacrossNetwork(first,
edgelist = same_pairs_first,
W = W_first,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractTestStatisticOnly = TRUE)
names(first_DV_stats) <- nonDE_HVG_first
first_DV_wVars = t(DCARSacrossNetwork(first,
edgelist = same_pairs_first,
W = W_first,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractWcorSequenceOnly = TRUE))
rownames(first_DV_wVars) <- nonDE_HVG_first
if (parallel) {
split_df = split.data.frame(same_pairs_first, rep(1:ncores, length.out = nrow(same_pairs_first)))
names(split_df) <- NULL
first_DV_permstats_raw = mclapply(split_df, function(s) {
res = DCARSacrossNetwork(first,
edgelist = s,
W = W_first,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 1000)
res = lapply(res, unlist)
names(res) <- rownames(s)
return(res)
}, mc.cores = ncores)
first_DV_permstats = unlist(first_DV_permstats_raw, recursive = FALSE)
first_DV_permstats = first_DV_permstats[rownames(same_pairs_first)]
} else {
first_DV_permstats_raw = DCARSacrossNetwork(first,
edgelist = same_pairs_first,
W = W_first,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 1000)
first_DV_permstats = lapply(first_DV_permstats_raw, unlist)
names(first_DV_permstats) <- nonDE_HVG_first
}
first_DV_pvals = sapply(1:length(first_DV_stats), function(i){
mean(first_DV_permstats[[i]] >= first_DV_stats[i])
})
names(first_DV_pvals) <- nonDE_HVG_first
first_DV_fdr = p.adjust(first_DV_pvals, method = "BH")
save(W_first, same_pairs_first, first_DV_stats, first_DV_wVars, first_DV_permstats, first_DV_pvals, first_DV_fdr, file = "output/first_DV.RData")
} else {
load("output/first_DV.RData")
}
DV_FDR_level = 0.1
first_sig = names(which(sort(first_DV_fdr)<DV_FDR_level))
dim(first_sig)
## NULL
matplot(t(first_DV_wVars[first_sig, ]), type = "l")
# changes all appear monotonic
# class genes into gain or loss or variability
sig_class = ifelse(first_DV_wVars[first_sig, ncol(first_DV_wVars)] > first_DV_wVars[first_sig, 1],
"gain","loss")
table(sig_class)
## sig_class
## gain loss
## 58 10
sort(sig_class)
## Asf1b Birc5 Cadm1 Ccnb2 Cdca3 Eno1 H2afz Hmgb1
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Hmgb2 Hprt Mad2l1 Mat2a Mthfd2 Nasp Nmt2 Pfkl
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Sfpq Snhg4 Trim59 Ybx1 Cdca8 Maged1 Sypl Ccna2
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Metap2 Pnrc2 Rpa1 Smc2 Fubp1 Mcm3 Mier3 Pola2
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Ptcd3 Serpinf2 Vtn Clk1 Trp53 Hmga2 Tacc3 Tpi1
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Zfp260 Ect2 Haus3 Hsph1 Nop56 Wrb Cenpa Nap1l1
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Rfc3 Spcs3 Ncapd2 Rcn3 Shcbp1 Lect2 Zfp871 Tnfaip8
## "gain" "gain" "gain" "gain" "gain" "gain" "gain" "gain"
## Cd63 Aurka Creb3l3 Dmgdh Hmgcs2 Gimd1 Rpl3-ps1 Tmem37
## "gain" "gain" "loss" "loss" "loss" "loss" "loss" "loss"
## Insr G0s2 Nab1 Fbxo8
## "loss" "loss" "loss" "loss"
matplot(t(first_DV_wVars[names(which(sig_class == "gain")), ]), type = "l",
col = "black", lty = 1)
matplot(t(first_DV_wVars[names(which(sig_class == "loss")), ]), type = "l",
col = "black", lty = 1)
genesOfInterest = c("Birc5","H2afz","Tacc3", "Hmgcs2")
gList = sapply(genesOfInterest, function(i) {
require(patchwork)
df = data.frame(
rank = 1:ncol(first),
pseudotime = first_pseudotime,
expr = first[i,],
mean = apply(W_first, 1, function(w) weightedMean(first[i,], w)),
var = first_DV_wVars[i,],
sd = sqrt(first_DV_wVars[i,])
)
df$lower = df$mean - df$sd
df$upper = df$mean + df$sd
g1 = ggplot(df, aes(x = pseudotime, y = expr)) +
geom_point() +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, colour = NA) +
geom_line(aes(y = mean)) +
theme_minimal() +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
ggtitle(ifelse(is.numeric(i),nonDE_HVG_first[i], i)) +
theme(axis.title = element_text(size = 15)) +
theme(plot.title = element_text(face = "italic")) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
xlab(ifelse(i == genesOfInterest[1],"Pseudotime","")) +
ylab(ifelse(i == genesOfInterest[1],"Expression","")) +
NULL
g2 = DCARS::plotOrderedExpression(list(Hepatocye = liver_branch_hep, Cholangiocyte = liver_branch_chol),
gene = i,
xvals = list(Hepatocye = liver_pseudotime_hep, Cholangiocyte = liver_pseudotime_chol)) +
xlab("Pseudotime") +
ylab("Expression") +
theme(panel.grid = element_blank()) +
facet_grid(branch~.) +
theme(legend.position = "none") +
geom_vline(xintercept = max(first_pseudotime), linetype = "dashed", colour = "darkgrey", size = 2) +
NULL
g1 + g2 + plot_layout(ncol = 1, heights = c(1,2))
return(g1)
}, simplify = FALSE)
g_first_DV_examples = wrap_plots(gList, nrow = 1)
g_first_DV_examples
ggsave(g_first_DV_examples, file = "output/first_DV_gain_ribbon_examples.pdf",
height = 3, width = 10)
df_gain_raw = sapply(names(which(sig_class == "gain")), function(i){
df = data.frame(
rank = 1:ncol(first),
pseudotime = first_pseudotime,
expr = first[i,],
mean = apply(W_first, 1, function(w) weightedMean(first[i,], w)),
var = first_DV_wVars[i,],
sd = sqrt(first_DV_wVars[i,])
)
df$lower = df$mean - df$sd
df$upper = df$mean + df$sd
df$gene = i
return(df)
}, simplify = FALSE)
df_gain = do.call(rbind, df_gain_raw)
g = ggplot(df_gain, aes(x = pseudotime, y = expr, group = gene)) +
geom_point(alpha = 0.5, size = 0.5) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, colour = NA) +
theme_classic() +
facet_wrap(gene~., scales = "free") +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
xlab("Pseudotime") +
ylab("Expression") +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
theme(strip.text = element_text(face = "italic")) +
theme(axis.text.x = element_text(size = 1)) +
theme(axis.ticks.x = element_blank()) +
ggtitle("Genes gaining variability in first branch of hepatoblasts") +
NULL
g
ggsave(g, file = "output/first_DV_gain_ribbon.pdf", height = 8, width = 10)
df_loss_raw = sapply(names(which(sig_class == "loss")), function(i){
df = data.frame(
rank = 1:ncol(first),
pseudotime = first_pseudotime,
expr = first[i,],
mean = apply(W_first, 1, function(w) weightedMean(first[i,], w)),
var = first_DV_wVars[i,],
sd = sqrt(first_DV_wVars[i,])
)
df$lower = df$mean - df$sd
df$upper = df$mean + df$sd
df$gene = i
return(df)
}, simplify = FALSE)
df_loss = do.call(rbind, df_loss_raw)
g = ggplot(df_loss, aes(x = pseudotime, y = expr, group = gene)) +
geom_point(alpha = 0.5) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, colour = NA) +
theme_classic() +
facet_wrap(gene~., scales = "free", nrow = 2) +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
theme(strip.text = element_text(face = "italic")) +
theme(axis.text.x = element_text(size = 1)) +
theme(axis.ticks.x = element_blank()) +
xlab("Pseudotime") +
ylab("Expression") +
ggtitle("Genes losing variability in first branch of hepatoblasts") +
NULL
g
ggsave(g, file = "output/first_DV_loss_ribbon.pdf", height = 5, width = 10)
Print this out for significantly differentially variable genes.
first_DV_all = data.frame(
gene = names(first_DV_stats),
stat = first_DV_stats,
# globalVar = ,
pval = first_DV_pvals,
fdr = first_DV_fdr,
sig_class = sig_class[names(first_DV_stats)],
startVar = first_DV_wVars[,1],
endVar = first_DV_wVars[,ncol(first_DV_wVars)]
)
first_DV_all_sorted = reshape::sort_df(first_DV_all, "fdr")
write.table(first_DV_all_sorted, file = "output/first_DV_all_sorted.tsv", sep = "\t",
row.names = FALSE, col.names = TRUE, quote = FALSE)
write.table(subset(first_DV_all_sorted, fdr < DV_FDR_level), file = "output/first_DV_all_sorted_sig.tsv", sep = "\t",
row.names = FALSE, col.names = TRUE, quote = FALSE)
# Divide data into half/half and perform a normal-distribution based
# F-test, and compare against scHOT testing
var.test.split = 0.5
var.test.split.inds = 1:ncol(first) %in% c(1:floor(var.test.split*ncol(first)))
# var.test.i = 1
var.test.pvals = sapply(1:length(first_DV_pvals), function(var.test.i) {
var.test(first[names(first_DV_pvals)[var.test.i],var.test.split.inds],
first[names(first_DV_pvals)[var.test.i],!var.test.split.inds])$p.value
})
names(var.test.pvals) <- names(first_DV_pvals)
var.test.fdr = p.adjust(var.test.pvals, method = "BH")
table(var.test.fdr < DV_FDR_level)
##
## FALSE TRUE
## 281 219
table(first_DV_fdr < DV_FDR_level)
##
## FALSE TRUE
## 432 68
addmargins(table(var.test.fdr < DV_FDR_level, first_DV_fdr < DV_FDR_level))
##
## FALSE TRUE Sum
## FALSE 279 2 281
## TRUE 153 66 219
## Sum 432 68 500
plot(-log10(first_DV_pvals + 0.5*min(first_DV_pvals[first_DV_pvals != 0])), -log10(var.test.pvals), type = "n")
text(-log10(first_DV_pvals + 0.5*min(first_DV_pvals[first_DV_pvals != 0])), -log10(var.test.pvals), labels = names(first_DV_pvals))
abline(c(0,1), lty = 2)
topNot = names(sort(var.test.fdr[first_DV_fdr >= DV_FDR_level]))[1:20]
# outlierGenes = c("Tceal8", "Rian", "Rsl1d1", "Mttp", "Meg3", "Nab1", "Shcbp1")
gList = sapply(topNot, function(gene) {
qplot(1:ncol(first), first[gene,]) +
xlab("Pseudotime Ranking") +
ylab("Expression") +
ggtitle(gene) +
theme_classic() +
theme(plot.title = element_text(face = "italic")) +
geom_vline(xintercept = max(which(var.test.split.inds)),linetype = "dashed") +
# geom_text(aes(x = x, y = y, label = text),
# data = data.frame(x = max(which(var.test.split.inds)),
# y = 0,
# text = "Trajectory \n middle point"),
# inherit.aes = FALSE) +
NULL
}, simplify = FALSE)
g = wrap_plots(gList)
g
ggsave(g, file = "output/first_DV_topNot.pdf",height = 10, width = 14)
# a number of genes are driven by outlier expression (e.g. Tceal8, Rsl1d1)
# intersection
names(var.test.fdr)[var.test.fdr < DV_FDR_level & first_DV_fdr < DV_FDR_level]
## [1] "Asf1b" "Aurka" "Birc5" "Cadm1" "Ccna2" "Ccnb2"
## [7] "Cd63" "Cdca3" "Cdca8" "Cenpa" "Clk1" "Creb3l3"
## [13] "Dmgdh" "Ect2" "Eno1" "Fbxo8" "Fubp1" "G0s2"
## [19] "Gimd1" "H2afz" "Haus3" "Hmga2" "Hmgb1" "Hmgb2"
## [25] "Hmgcs2" "Hprt" "Hsph1" "Insr" "Lect2" "Mad2l1"
## [31] "Maged1" "Mat2a" "Mcm3" "Metap2" "Mier3" "Mthfd2"
## [37] "Nap1l1" "Nasp" "Ncapd2" "Nmt2" "Nop56" "Pfkl"
## [43] "Pnrc2" "Pola2" "Ptcd3" "Rcn3" "Rfc3" "Rpa1"
## [49] "Rpl3-ps1" "Serpinf2" "Sfpq" "Smc2" "Snhg4" "Spcs3"
## [55] "Sypl" "Tacc3" "Tmem37" "Tnfaip8" "Tpi1" "Trim59"
## [61] "Trp53" "Vtn" "Wrb" "Ybx1" "Zfp260" "Zfp871"
# scHOT but not variance
names(var.test.fdr)[var.test.fdr >= DV_FDR_level & first_DV_fdr < DV_FDR_level]
## [1] "Nab1" "Shcbp1"
# variance but not scHOT
names(var.test.fdr)[var.test.fdr < DV_FDR_level & first_DV_fdr >= DV_FDR_level]
## [1] "4933434E20Rik" "Aacs" "Aass" "Abcg2"
## [5] "Actb" "Actg1" "Ado" "Afm"
## [9] "Afp" "Ambp" "Apoa2" "Apom"
## [13] "Arhgap5" "Asah1" "Ass1" "Atad2"
## [17] "Atp6v1b2" "Atp7a" "Atrx" "Bckdk"
## [21] "Bex1" "Bex2" "Bex4" "Bsg"
## [25] "C1s1" "Calm1" "Casp8ap2" "Ccdc50"
## [29] "Ccnb1" "Ccnd1" "Cd59a" "Cdc45"
## [33] "Cdca7" "Ckap5" "Cks1b" "Cltc"
## [37] "Cript" "Ctsb" "Ctsd" "Cxcl12"
## [41] "Ddah1" "Ddx52" "Dhrs3" "Dscr3"
## [45] "Eif4ebp2" "Eno1b" "F11r" "F2r"
## [49] "Fah" "Fgg" "Fign" "Fignl1"
## [53] "Gckr" "Ghr" "Gpc3" "Grb10"
## [57] "Gstm4" "Gtf2a1" "H3f3b" "Hat1"
## [61] "Hhex" "Hsd17b4" "Igfbp1" "Il1r1"
## [65] "Ivns1abp" "Kcnq1ot1" "Klf13" "Lifr"
## [69] "Lig1" "Lrrc58" "Lrwd1" "Maged2"
## [73] "Maob" "Mcm4" "Mcm5" "Mcm7"
## [77] "Meg3" "Mif" "Mllt10" "Mtfr1"
## [81] "Mttp" "Myh10" "Nek7" "Nmd3"
## [85] "Otud4" "Pdk1" "Pecr" "Peg3"
## [89] "Pgk1" "Pgm2" "Phgdh" "Poldip3"
## [93] "Ppp1cb" "Ppp2r5c" "Ppp6c" "Ppt1"
## [97] "Prdx4" "Prim1" "Psmf1" "Pttg1ip"
## [101] "Pum2" "R3hdm1" "Rabif" "Rad21"
## [105] "Rbm47" "Rbm5" "Rbp4" "Rfc2"
## [109] "Rian" "Rnmt" "Rpl36al" "Rps4l"
## [113] "Rrm1" "Rrm2" "Rsl1d1" "Sdc4"
## [117] "Sept11" "Serpina1b" "Serpina1d" "Serpind1"
## [121] "Serpinf1" "Sgms1" "Sh3bgrl" "Sigirr"
## [125] "Slbp" "Snx5" "Spc25" "Spin1"
## [129] "Spint2" "Srsf1" "Tars" "Tceal8"
## [133] "Tipin" "Tmpo" "Tpm4" "Tpp2"
## [137] "Trf" "Trim71" "Tspan12" "Ttc4"
## [141] "Ttr" "Tuba1a" "Tuba1c" "Tubb4b"
## [145] "Tubb5" "Tubb6" "Ubl3" "Vars"
## [149] "Vcam1" "Wdfy3" "Zc3hav1" "Zfp644"
## [153] "Zmynd8"
var.test.split.inds = (1:ncol(liver_branch_hep)) %in% c(1:ncol(first))
var.test.pvals = sapply(names(hep_DV_pvals), function(gene) {
var.test(liver_branch_hep[gene,var.test.split.inds],
liver_branch_hep[gene,!var.test.split.inds])$p.value
})
# names(var.test.pvals) <- names(hep_DV_pvals)
var.test.fdr = p.adjust(var.test.pvals, method = "BH")
gene = "Cdca8"
plot(hep_DV_wVars[gene,])
var.test.fdr[gene]
## Cdca8
## 0.8618713
g = ggplot(data.frame(index = 1:ncol(liver_branch_hep),
val = liver_branch_hep[gene,],
upper = mean(liver_branch_hep[gene,]) + hep_DV_wVars[gene,],
lower = mean(liver_branch_hep[gene,]) - hep_DV_wVars[gene,]),
aes(x = index, y = val)) +
geom_point() +
xlab("Pseudotime Ranking") +
ylab("Expression") +
ggtitle(gene) +
geom_ribbon(aes(x = index, ymin = lower, ymax = upper), alpha = 0.5) +
geom_vline(xintercept = max(which(var.test.split.inds)), linetype = "dotted") +
# geom_vline(xintercept = ncol(first)) +
theme_classic() +
geom_text(aes(label = text), data = data.frame(index = ncol(first),
val = 11,
text = "Trajectory \n Decision point"),
inherit.aes = TRUE) +
theme(plot.title = element_text(face = "italic")) +
NULL
g
ggsave(g, file = paste0("output/hep_DV_", gene, ".pdf"),height = 4, width = 5)
if (!file.exists("output/GO_list.Rds")) {
library(GO.db)
library(org.Mm.eg.db)
keys = keys(org.Mm.eg.db)
columns(org.Mm.eg.db)
GO_info = AnnotationDbi::select(org.Mm.eg.db, keys=keys, columns = c("SYMBOL", "GO"))
keep = GO_info$SYMBOL %in% rownames(liver)
table(keep)
GO_info_filt = GO_info[keep,]
# at least 10 genes in the term and less than 500
allTerms = names(which(table(GO_info_filt$GO) >= 10 & table(GO_info_filt$GO) <= 500))
GO_info_terms = AnnotationDbi::select(GO.db, columns = columns(GO.db), keys = allTerms)
rownames(GO_info_terms) <- GO_info_terms$GOID
allTermNames = GO_info_terms[allTerms, "TERM"]
names(allTermNames) <- allTerms
GO_list = sapply(allTerms, function(term) {
print(term)
genes = GO_info_filt[GO_info_filt$GO == term, "SYMBOL"]
return(sort(unique(genes[!is.na(genes)])))
}, simplify = FALSE)
names(GO_list) <- allTermNames
saveRDS(GO_list, file = "output/GO_list.Rds")
} else {
GO_list = readRDS("output/GO_list.Rds")
}
GO_list_DV_first = GO_list[unlist(lapply(GO_list, function(x) any(x %in% same_pairs_first[,1])))]
first_DV_genes = list(gain = names(which(sig_class == "gain")),
loss = names(which(sig_class == "loss")))
first_DV_genes_GO = lapply(first_DV_genes, function(set){
genesetGOtest(set, rownames(liver), GO_list_DV_first)
})
gList = lapply(first_DV_genes_GO, function(pval) {
df = data.frame(term = factor(names(pval), levels = c(names(pval), "")),
pval = pval,
qval = p.adjust(pval, method = "BH"))
df$label = df$term
df$label[pval != 1] <- ""
df_sorted = sort_df(df, "pval")[1:10,]
df_sorted$term = factor(df_sorted$term, levels = rev(df_sorted$term))
g = ggplot(df_sorted, aes(x = term, y = -log10(pval), fill = qval < 0.05)) +
theme_classic() +
geom_col() +
coord_flip() +
xlab("") +
ylab(expression("-log10(P-value)")) +
# geom_hline(yintercept = -log10(0.01), colour = "red", linetype = "dashed", size = 1.2) +
scale_fill_manual(values = c("TRUE" = "dimgrey", "FALSE" = "peachpuff")) +
theme(legend.position = "none") +
theme(axis.text.y = element_text(size = 10)) +
theme(title = element_text(hjust = 0.5)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
NULL
return(g)
})
gList[[1]] <- gList[[1]] + ggtitle("Gain of variability")
gList[[2]] <- gList[[2]] + ggtitle("Loss of variability")
gList_first_DV = patchwork::wrap_plots(gList, nrow = 1)
ggsave(gList_first_DV, file = "output/first_DV_GO.pdf",height = 4.5, width = 9)
ggsave(gList[[1]], file = "output/first_DV_gain_GO.pdf",height = 4.5, width = 4.5)
ggsave(gList[[2]], file = "output/first_DV_loss_GO.pdf",height = 4.5, width = 4.5)
sig_gain = c(names(which(sig_class == "gain")), names(which(sig_class == "loss")))
chol_DV_wVars_sig_gain = t(DCARSacrossNetwork(liver_branch_chol,
edgelist = cbind(sig_gain,sig_gain),
W = W_chol,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractWcorSequenceOnly = TRUE))
## [1] "Asf1b Asf1b"
## [1] "Birc5 Birc5"
## [1] "Cadm1 Cadm1"
## [1] "Ccnb2 Ccnb2"
## [1] "Cdca3 Cdca3"
## [1] "Eno1 Eno1"
## [1] "H2afz H2afz"
## [1] "Hmgb1 Hmgb1"
## [1] "Hmgb2 Hmgb2"
## [1] "Hprt Hprt"
## [1] "Mad2l1 Mad2l1"
## [1] "Mat2a Mat2a"
## [1] "Mthfd2 Mthfd2"
## [1] "Nasp Nasp"
## [1] "Nmt2 Nmt2"
## [1] "Pfkl Pfkl"
## [1] "Sfpq Sfpq"
## [1] "Snhg4 Snhg4"
## [1] "Trim59 Trim59"
## [1] "Ybx1 Ybx1"
## [1] "Cdca8 Cdca8"
## [1] "Maged1 Maged1"
## [1] "Sypl Sypl"
## [1] "Ccna2 Ccna2"
## [1] "Metap2 Metap2"
## [1] "Pnrc2 Pnrc2"
## [1] "Rpa1 Rpa1"
## [1] "Smc2 Smc2"
## [1] "Fubp1 Fubp1"
## [1] "Mcm3 Mcm3"
## [1] "Mier3 Mier3"
## [1] "Pola2 Pola2"
## [1] "Ptcd3 Ptcd3"
## [1] "Serpinf2 Serpinf2"
## [1] "Vtn Vtn"
## [1] "Clk1 Clk1"
## [1] "Trp53 Trp53"
## [1] "Hmga2 Hmga2"
## [1] "Tacc3 Tacc3"
## [1] "Tpi1 Tpi1"
## [1] "Zfp260 Zfp260"
## [1] "Ect2 Ect2"
## [1] "Haus3 Haus3"
## [1] "Hsph1 Hsph1"
## [1] "Nop56 Nop56"
## [1] "Wrb Wrb"
## [1] "Cenpa Cenpa"
## [1] "Nap1l1 Nap1l1"
## [1] "Rfc3 Rfc3"
## [1] "Spcs3 Spcs3"
## [1] "Ncapd2 Ncapd2"
## [1] "Rcn3 Rcn3"
## [1] "Shcbp1 Shcbp1"
## [1] "Lect2 Lect2"
## [1] "Zfp871 Zfp871"
## [1] "Tnfaip8 Tnfaip8"
## [1] "Cd63 Cd63"
## [1] "Aurka Aurka"
## [1] "Creb3l3 Creb3l3"
## [1] "Dmgdh Dmgdh"
## [1] "Hmgcs2 Hmgcs2"
## [1] "Gimd1 Gimd1"
## [1] "Rpl3-ps1 Rpl3-ps1"
## [1] "Tmem37 Tmem37"
## [1] "Insr Insr"
## [1] "G0s2 G0s2"
## [1] "Nab1 Nab1"
## [1] "Fbxo8 Fbxo8"
rownames(chol_DV_wVars_sig_gain) <- sig_gain
hep_DV_wVars_sig_gain = t(DCARSacrossNetwork(liver_branch_hep,
edgelist = cbind(sig_gain,sig_gain),
W = W_hep,
weightedConcordanceFunction = weightedVarMatrixStats,
verbose = TRUE,
extractWcorSequenceOnly = TRUE))
## [1] "Asf1b Asf1b"
## [1] "Birc5 Birc5"
## [1] "Cadm1 Cadm1"
## [1] "Ccnb2 Ccnb2"
## [1] "Cdca3 Cdca3"
## [1] "Eno1 Eno1"
## [1] "H2afz H2afz"
## [1] "Hmgb1 Hmgb1"
## [1] "Hmgb2 Hmgb2"
## [1] "Hprt Hprt"
## [1] "Mad2l1 Mad2l1"
## [1] "Mat2a Mat2a"
## [1] "Mthfd2 Mthfd2"
## [1] "Nasp Nasp"
## [1] "Nmt2 Nmt2"
## [1] "Pfkl Pfkl"
## [1] "Sfpq Sfpq"
## [1] "Snhg4 Snhg4"
## [1] "Trim59 Trim59"
## [1] "Ybx1 Ybx1"
## [1] "Cdca8 Cdca8"
## [1] "Maged1 Maged1"
## [1] "Sypl Sypl"
## [1] "Ccna2 Ccna2"
## [1] "Metap2 Metap2"
## [1] "Pnrc2 Pnrc2"
## [1] "Rpa1 Rpa1"
## [1] "Smc2 Smc2"
## [1] "Fubp1 Fubp1"
## [1] "Mcm3 Mcm3"
## [1] "Mier3 Mier3"
## [1] "Pola2 Pola2"
## [1] "Ptcd3 Ptcd3"
## [1] "Serpinf2 Serpinf2"
## [1] "Vtn Vtn"
## [1] "Clk1 Clk1"
## [1] "Trp53 Trp53"
## [1] "Hmga2 Hmga2"
## [1] "Tacc3 Tacc3"
## [1] "Tpi1 Tpi1"
## [1] "Zfp260 Zfp260"
## [1] "Ect2 Ect2"
## [1] "Haus3 Haus3"
## [1] "Hsph1 Hsph1"
## [1] "Nop56 Nop56"
## [1] "Wrb Wrb"
## [1] "Cenpa Cenpa"
## [1] "Nap1l1 Nap1l1"
## [1] "Rfc3 Rfc3"
## [1] "Spcs3 Spcs3"
## [1] "Ncapd2 Ncapd2"
## [1] "Rcn3 Rcn3"
## [1] "Shcbp1 Shcbp1"
## [1] "Lect2 Lect2"
## [1] "Zfp871 Zfp871"
## [1] "Tnfaip8 Tnfaip8"
## [1] "Cd63 Cd63"
## [1] "Aurka Aurka"
## [1] "Creb3l3 Creb3l3"
## [1] "Dmgdh Dmgdh"
## [1] "Hmgcs2 Hmgcs2"
## [1] "Gimd1 Gimd1"
## [1] "Rpl3-ps1 Rpl3-ps1"
## [1] "Tmem37 Tmem37"
## [1] "Insr Insr"
## [1] "G0s2 G0s2"
## [1] "Nab1 Nab1"
## [1] "Fbxo8 Fbxo8"
rownames(hep_DV_wVars_sig_gain) <- sig_gain
first_DV_wVars_sig_gain = first_DV_wVars[sig_gain,]
first_wVAR_comparison_df_chol = data.frame(
branch = "chol",
gene = rep(rownames(chol_DV_wVars_sig_gain), ncol(chol_DV_wVars_sig_gain)),
position = rep(1:ncol(chol_DV_wVars_sig_gain), each = nrow(chol_DV_wVars_sig_gain)),
wVAR = c(chol_DV_wVars_sig_gain),
pseudotime = liver_pseudotime_chol[rep(1:ncol(chol_DV_wVars_sig_gain), each = nrow(chol_DV_wVars_sig_gain))]
)
first_wVAR_comparison_df_hep = data.frame(
branch = "hep",
gene = rep(rownames(hep_DV_wVars_sig_gain), ncol(hep_DV_wVars_sig_gain)),
position = rep(1:ncol(hep_DV_wVars_sig_gain), each = nrow(hep_DV_wVars_sig_gain)),
wVAR = c(hep_DV_wVars_sig_gain),
pseudotime = liver_pseudotime_hep[rep(1:ncol(hep_DV_wVars_sig_gain), each = nrow(hep_DV_wVars_sig_gain))]
)
first_wVAR_comparison_df_first = data.frame(
branch = "first",
gene = rep(rownames(first_DV_wVars_sig_gain[sig_gain,]), ncol(first_DV_wVars_sig_gain[sig_gain,])),
position = rep(1:ncol(first_DV_wVars_sig_gain[sig_gain,]), each = nrow(first_DV_wVars_sig_gain[sig_gain,])),
wVAR = c(first_DV_wVars_sig_gain[sig_gain,]),
pseudotime = liver_pseudotime_chol[rep(1:ncol(first_DV_wVars_sig_gain[sig_gain,]), each = nrow(first_DV_wVars_sig_gain[sig_gain,]))]
)
first_wVAR_comparison_df = rbind(
first_wVAR_comparison_df_chol,
first_wVAR_comparison_df_hep,
first_wVAR_comparison_df_first
)
first_wVAR_comparison_df$chol_isnonDE = first_wVAR_comparison_df$gene %in% nonDE_HVG_chol
first_wVAR_comparison_df$hep_isnonDE = first_wVAR_comparison_df$gene %in% nonDE_HVG_hep
ggplot(first_wVAR_comparison_df, aes(x = pseudotime, y = wVAR, group = branch, colour = branch)) +
geom_line(size = 2) +
facet_wrap(chol_isnonDE~gene) +
theme_minimal() +
NULL
g1 = ggplot(subset(first_wVAR_comparison_df, chol_isnonDE & hep_isnonDE),
aes(x = position, y = wVAR, group = branch, colour = branch)) +
geom_line(size = 2) +
facet_wrap(~gene, scales = "free") +
theme_minimal() +
ggtitle("not DE in either") +
geom_vline(xintercept = length(first_pseudotime)) +
NULL
g1
g2 = ggplot(subset(first_wVAR_comparison_df, !chol_isnonDE & hep_isnonDE),
aes(x = position, y = wVAR, group = branch, colour = branch)) +
geom_line(size = 2) +
facet_wrap(~gene, scales = "free") +
theme_minimal() +
ggtitle("DE only in chol") +
geom_vline(xintercept = length(first_pseudotime)) +
NULL
g2
g3 = ggplot(subset(first_wVAR_comparison_df, chol_isnonDE & !hep_isnonDE),
aes(x = position, y = wVAR, group = branch, colour = branch)) +
geom_line(size = 2) +
facet_wrap(~gene, scales = "free") +
theme_minimal() +
ggtitle("DE only in hep") +
geom_vline(xintercept = length(first_pseudotime)) +
NULL
g3
g4 = ggplot(subset(first_wVAR_comparison_df, !chol_isnonDE & !hep_isnonDE),
aes(x = position, y = wVAR, group = branch, colour = branch)) +
geom_line(size = 2) +
facet_wrap(~gene, scales = "free") +
theme_minimal() +
ggtitle("DE in both hep and chol") +
geom_vline(xintercept = length(first_pseudotime)) +
NULL
g4
ggsave(g1, file = "output/first_wVAR_comparison_1.pdf",height = 10, width = 12)
ggsave(g2, file = "output/first_wVAR_comparison_2.pdf",height = 10, width = 12)
ggsave(g3, file = "output/first_wVAR_comparison_3.pdf",height = 10, width = 12)
ggsave(g4, file = "output/first_wVAR_comparison_4.pdf",height = 10, width = 12)
first_wVAR_comparison_df$branch = factor(first_wVAR_comparison_df$branch, levels = c("hep", "chol", "first"))
genesOfInterest = c("Birc5","H2afz","Tacc3", "Hmgcs2")
gList = sapply(genesOfInterest, function(geneOfInterest) {
g5 = ggplot(subset(first_wVAR_comparison_df, gene %in% geneOfInterest),
aes(x = position, y = wVAR, group = branch, colour = branch,
alpha = branch, size = branch)) +
geom_line() +
theme_minimal() +
theme(legend.position = "none") +
ggtitle(geneOfInterest) +
theme(panel.grid = element_blank()) +
xlab("Pseudotime ranking") +
ylab("Local weighted variance") +
geom_vline(xintercept = length(first_pseudotime)) +
scale_colour_tableau() +
scale_alpha_manual(values = c("chol" = 0.5, "hep" = 0.5, "first" = 1)) +
scale_size_manual(values = c("chol" = 1, "hep" = 1, "first" = 2)) +
theme(axis.title = element_text(size = 15)) +
theme(plot.title = element_text(face = "italic")) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
xlab(ifelse(geneOfInterest == genesOfInterest[1],"Pseudotime ranking","")) +
ylab(ifelse(geneOfInterest == genesOfInterest[1],"Local weighted variance","")) +
NULL
return(g5)
}, simplify = FALSE)
gList_DV_lines = wrap_plots(gList, nrow = 1)
gList_DV_lines
ggsave(gList_DV_lines, file = "output/first_DV_gain_lines_examples.pdf",
height = 3, width = 10)
# calculate the wcors for all the ordering genes pairs
if (!file.exists("output/exprs_HVG.RData")) {
hep_exprs_HVG = exprs(liver)[nonDE_HVG_hep, names(liver_pseudotime_hep)]
chol_exprs_HVG = exprs(liver)[nonDE_HVG_chol, names(liver_pseudotime_chol)]
save(hep_exprs_HVG, chol_exprs_HVG, file = "output/exprs_HVG.RData")
} else {
load("output/exprs_HVG.RData")
}
# HVG calculated over entire trajectory cells
pairs = t(combn(sort(HVG), 2))
rownames(pairs) <- apply(pairs,1,paste0, collapse = "_")
pairs_hep_all = t(combn(sort(nonDE_HVG_hep), 2))
rownames(pairs_hep_all) <- apply(pairs_hep_all,1,paste0, collapse = "_")
pairs_chol_all = t(combn(sort(nonDE_HVG_chol), 2))
rownames(pairs_chol_all) <- apply(pairs_chol_all,1,paste0, collapse = "_")
if (!file.exists("output/all_wcors.RData")) {
pairs_hep_all = t(combn(sort(nonDE_HVG_hep), 2))
rownames(pairs_hep_all) <- apply(pairs_hep_all,1,paste0, collapse = "_")
pairs_chol_all = t(combn(sort(nonDE_HVG_chol), 2))
rownames(pairs_chol_all) <- apply(pairs_chol_all,1,paste0, collapse = "_")
W_hep = weightMatrix(ncol(hep_exprs_HVG), span = 0.25)
if (parallel) {
pairs_hep_split = split.data.frame(pairs_hep_all, rep(1:ncores, length.out = nrow(pairs_hep_all)))
wcors_hep_raw = mclapply(pairs_hep_split, function(pairs){
t(DCARSacrossNetwork(hep_exprs_HVG,
edgelist = pairs,
W = W_hep,
extractWcorSequenceOnly = TRUE,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE))
}, mc.cores = ncores)
wcors_hep = do.call(rbind, wcors_hep_raw)[rownames(pairs_hep_all),]
} else {
wcors_hep = t(DCARSacrossNetwork(hep_exprs_HVG,
edgelist = pairs_hep_all,
W = W_hep,
extractWcorSequenceOnly = TRUE,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE))
}
W_chol = weightMatrix(ncol(chol_exprs_HVG), span = 0.25)
if (parallel) {
pairs_chol_split = split.data.frame(pairs_chol_all, rep(1:ncores, length.out = nrow(pairs_chol_all)))
wcors_chol_raw = mclapply(pairs_chol_split, function(pairs){
t(DCARSacrossNetwork(chol_exprs_HVG,
edgelist = pairs,
W = W_chol,
extractWcorSequenceOnly = TRUE,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE))
}, mc.cores = ncores)
wcors_chol = do.call(rbind, wcors_chol_raw)[rownames(pairs_chol_all),]
} else {
wcors_chol = t(DCARSacrossNetwork(chol_exprs_HVG,
edgelist = pairs_chol_all,
W = W_chol,
extractWcorSequenceOnly = TRUE,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE))
}
save(wcors_hep, wcors_chol, W_chol, W_hep, pairs_chol_all, pairs_hep_all, file = "output/all_wcors.RData")
} else {
load("output/all_wcors.RData")
}
if (!file.exists("output/hep_permstats.Rdata")) {
set.seed(500)
hep_stats_all = apply(wcors_hep,1,sd)
hep_globalCor_all = apply(pairs_hep_all, 1, function(x)
cor(hep_exprs_HVG[x[1],], hep_exprs_HVG[x[2],], method = "spearman")
)
hep_sample = DCARS::stratifiedSample(hep_globalCor_all, length = 500)
if (parallel) {
hep_permstats_raw = mclapply(as.list(hep_sample), function(h) {
DCARSacrossNetwork(hep_exprs_HVG,
edgelist = pairs_hep_all[h, , drop = FALSE],
W = W_hep,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 1000)
}, mc.cores = ncores)
hep_permstats = lapply(hep_permstats_raw, unlist)
names(hep_permstats) <- rownames(pairs_hep_all[hep_sample,])
} else {
hep_permstats_raw = DCARSacrossNetwork(hep_exprs_HVG,
edgelist = pairs_hep_all[hep_sample,],
W = W_hep,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 500)
hep_permstats = unlist(hep_permstats_raw, recursive = FALSE)
names(hep_permstats) <- rownames(pairs_hep_all[hep_sample,])
}
save(hep_permstats, hep_globalCor_all, hep_stats_all, file = "output/hep_permstats.Rdata")
} else {
load("output/hep_permstats.Rdata")
}
if (!file.exists("output/hep_p_all.Rds")) {
hep_p_all = estimatePvaluesSpearman(hep_stats_all,
hep_globalCor_all,
hep_permstats,
usenperm = FALSE,
nperm = 10000,
plot = TRUE,
maxDist = 0.1,
verbose = TRUE)
hep_p_all$fdr <- p.adjust(hep_p_all$pval, method = "BH")
saveRDS(hep_p_all, file = "output/hep_p_all.Rds")
} else {
hep_p_all <- readRDS("output/hep_p_all.Rds")
}
FDR_level = 0.2
Compare estimated p-values with full 10,000 permutations for all gene pairs. This takes a very long time to run, was performed on the cluster.
if (!file.exists("output/hep_full_pvals.Rds")) {
source("hep_all_permutations.R")
}
hep_full_pvals = readRDS("output/hep_full_pvals.Rds")
plot(-log10(hep_full_pvals), -log10(hep_p_all$pval)); abline(c(0,1))
cor(-log10(hep_full_pvals), -log10(hep_p_all$pval),
method = "spearman", use = "p")
## [1] 0.9948061
hep_full_pvals[hep_full_pvals == 0] <- 1/10001
full_p_df = data.frame(
gene = names(hep_full_pvals),
pval_all = hep_full_pvals,
pval_est = hep_p_all$pval
)
full_p_df$loess = loess(I(-log10(pval_est)) ~ I(-log10(pval_all)), data = full_p_df, span = 0.3)$fitted
full_p_df = reshape::sort_df(full_p_df, "pval_all")
g = ggplot(full_p_df, aes(x = -log10(pval_all), y = -log10(pval_est))) +
geom_point(colour = "darkgrey", alpha = 0.7) +
geom_line(aes(y = loess), colour = "red") +
theme_classic() +
xlab("P-value (10,000 permutations each)") +
ylab("Estimated P-value") +
geom_vline(xintercept = 2, linetype = "dashed", colour = "grey") +
geom_hline(yintercept = 2, linetype = "dashed", colour = "grey") +
geom_abline(slope = 1, intercept = 0, colour = "black") +
geom_text(x = 0.15, y = 5, label = paste0("Spearman \ncorrelation: \n ",
round(cor(-log10(hep_full_pvals), -log10(hep_p_all$pval),
method = "spearman", use = "p"), 3))) +
NULL
g
ggsave(g, file = "output/hep_full_pval_scatterplot.pdf",height = 6, width = 6)
fit = lm(I(-log10(pval_est)) ~ I(-log10(pval_all)), data = full_p_df)
fit$coef
## (Intercept) I(-log10(pval_all))
## 0.005005454 1.001514292
Above was done with weighted Spearman, also perform with weightedPearson, MIC and brownian distance correlation. The latter two with a blocked weightmatrix due to implementation.
# calculate global values for each and take stratified sample of size 100 from
# each and take the union of these
if (!file.exists("output/test_statistics_output.RData")) {
source("test_statistics_hep.R")
}
load("output/test_statistics_global.RData")
load("output/test_statistics_output.RData")
test_statistics_p_all_all = do.call(rbind, test_statistics_p_all)
test_statistics_p_all_all$testing <- rep(names(test_statistics_p_all),
times = unlist(lapply(test_statistics_p_all, nrow)))
test_statistics_pvals = do.call(cbind,lapply(test_statistics_p_all, "[", "pval"))
colnames(test_statistics_pvals) <- names(test_statistics_p_all)
test_statistics_cor = cor(-log10(test_statistics_pvals), method = "spearman")
g = ggpairs(-log10(test_statistics_pvals),
lower = list(continuous = wrap("points", alpha = 0.3, size=0.1),
combo = wrap("dot", alpha = 0.4, size=0.2)),
title = "-log10(P-value)"
)
g
ggsave(g, file = "output/test_statistics_pairs.pdf",height = 8, width = 8)
pdf("output/test_statistics_corrplot.pdf", height = 6, width = 6)
g_cor = corrplot(test_statistics_cor, order = "hclust")
dev.off()
## quartz_off_screen
## 2
pdf("output/test_statistics_upset.pdf", height = 5, width = 8, onefile = FALSE)
upset(data = as.data.frame(as.matrix(1*(apply(test_statistics_pvals,2,p.adjust, method = "BH") < 0.2))),
sets = colnames(test_statistics_pvals))
dev.off()
## quartz_off_screen
## 2
# distribution graphs
test_statistics_distn = data.frame(
quantile_999 = unlist(lapply(test_statistics_perms, function(x) unlist(lapply(x, quantile, probs = 0.999)))),
quantile_90 = unlist(lapply(test_statistics_perms, function(x) unlist(lapply(x, quantile, probs = 0.90)))),
quantile_95 = unlist(lapply(test_statistics_perms, function(x) unlist(lapply(x, quantile, probs = 0.95)))),
quantile_99 = unlist(lapply(test_statistics_perms, function(x) unlist(lapply(x, quantile, probs = 0.99)))),
global = unlist(lapply(test_statistics_global, "[", test_statistics_sampled_ind)),
test_statistic = rep(names(test_statistics_global), times = unlist(lapply(test_statistics_perms, length)))
)
test_statistics_distn <- reshape::sort_df(test_statistics_distn, "global")
test_statistics_distn$quantile_999_fitted = unsplit(lapply(split.data.frame(test_statistics_distn, test_statistics_distn$test_statistic),
function(df){
loess(quantile_999 ~ global, data = df)$fitted
}), test_statistics_distn$test_statistic)
test_statistics_distn$quantile_90_fitted = unsplit(lapply(split.data.frame(test_statistics_distn, test_statistics_distn$test_statistic),
function(df){
loess(quantile_90 ~ global, data = df)$fitted
}), test_statistics_distn$test_statistic)
test_statistics_distn$quantile_95_fitted = unsplit(lapply(split.data.frame(test_statistics_distn, test_statistics_distn$test_statistic),
function(df){
loess(quantile_95 ~ global, data = df)$fitted
}), test_statistics_distn$test_statistic)
test_statistics_distn$quantile_99_fitted = unsplit(lapply(split.data.frame(test_statistics_distn, test_statistics_distn$test_statistic),
function(df){
loess(quantile_99 ~ global, data = df)$fitted
}), test_statistics_distn$test_statistic)
test_statistics_distn$test_statistic = factor(test_statistics_distn$test_statistic,
levels = names(test_statistics_global))
test_statistics_nicecolours = RColorBrewer::brewer.pal(name = "Blues", 9)
names(test_statistics_nicecolours) <- c(rep("", 5), "0.90","0.95","0.99","0.999")
g = ggplot(test_statistics_distn, aes(x = global)) +
geom_point(aes(y = quantile_999, colour = "0.999"),
alpha = 0.5) +#, colour = test_statistics_nicecolours[9]) +
geom_line(aes(y = quantile_999_fitted), colour = test_statistics_nicecolours[9]) +
geom_point(aes(y = quantile_99, colour = "0.99"),
alpha = 0.5) + #, colour = test_statistics_nicecolours[8]) +
geom_line(aes(y = quantile_99_fitted), colour = test_statistics_nicecolours[8]) +
geom_point(aes(y = quantile_95, colour = "0.95"),
alpha = 0.5) + #, colour = test_statistics_nicecolours[7]) +
geom_line(aes(y = quantile_95_fitted), colour = test_statistics_nicecolours[7]) +
geom_point(aes(y = quantile_90, colour = "0.90"),
alpha = 0.5) + #, colour = test_statistics_nicecolours[6]) +
geom_line(aes(y = quantile_90_fitted), colour = test_statistics_nicecolours[6]) +
facet_wrap(~test_statistic, scales = "free", ncol = 2) +
theme_classic() +
xlab("Global higher order statistic value") +
ylab("Permuted scHOT test statistic quantile") +
scale_colour_manual(name="Quantiles",values=test_statistics_nicecolours[6:9]) +
theme(legend.position = "bottom") +
NULL
ggsave(g, file = "output/test_statistics_pvalEstimation.pdf",
height = 12, width = 10)
Run on cluster
if (!file.exists("output/span_output.RData")) {
source("span_hep.R")
}
load("output/span_output.RData")
span_p_all_all = do.call(rbind, span_p_all)
span_p_all_all$testing <- rep(names(span_p_all),
times = unlist(lapply(span_p_all, nrow)))
span_pvals = do.call(cbind,lapply(span_p_all, "[", "pval"))
colnames(span_pvals) <- names(span_p_all)
span_cor = cor(-log10(span_pvals), method = "spearman")
pdf("output/span_corrplot.pdf", height = 10, width = 10)
g_cor = corrplot(span_cor, order = "original")
dev.off()
## quartz_off_screen
## 2
pdf("output/span_upset.pdf", height = 10, width = 16, onefile = FALSE)
upset(data = as.data.frame(as.matrix(1*(apply(span_pvals,2,p.adjust, method = "BH") < FDR_level))),
sets = colnames(span_pvals),
order.by = "freq",
text.scale = 2)
dev.off()
## quartz_off_screen
## 2
# distribution graphs
span_distn = data.frame(
quantile_999 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.999)))),
quantile_90 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.90)))),
quantile_95 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.95)))),
quantile_99 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.99)))),
global = unlist(lapply(span_global, "[", span_sampled_ind)),
span = rep(names(span_perms), times = unlist(lapply(span_perms, length)))
)
span_distn <- reshape::sort_df(span_distn, "global")
span_distn$quantile_999_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_999 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_90_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_90 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_95_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_95 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_99_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_99 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$span = factor(span_distn$span,levels = names(span_perms))
span_nicecolours = RColorBrewer::brewer.pal(name = "Blues", 9)
names(span_nicecolours) <- c(rep("", 5), "0.90","0.95","0.99","0.999")
g = ggplot(span_distn, aes(x = global)) +
geom_point(aes(y = quantile_999, colour = "0.999"),
alpha = 0.5) +#, colour = span_nicecolours[9]) +
geom_line(aes(y = quantile_999_fitted), colour = span_nicecolours[9]) +
geom_point(aes(y = quantile_99, colour = "0.99"),
alpha = 0.5) + #, colour = span_nicecolours[8]) +
geom_line(aes(y = quantile_99_fitted), colour = span_nicecolours[8]) +
geom_point(aes(y = quantile_95, colour = "0.95"),
alpha = 0.5) + #, colour = span_nicecolours[7]) +
geom_line(aes(y = quantile_95_fitted), colour = span_nicecolours[7]) +
geom_point(aes(y = quantile_90, colour = "0.90"),
alpha = 0.5) + #, colour = span_nicecolours[6]) +
geom_line(aes(y = quantile_90_fitted), colour = span_nicecolours[6]) +
# geom_point(aes(y = quantile_99_fitted)) +
facet_wrap(~span, scales = "free", ncol = 4) +
theme_classic() +
xlab("Global higher order statistic value") +
ylab("Permuted scHOT test statistic quantile") +
scale_colour_manual(name="Quantiles",values=span_nicecolours[6:9]) +
theme(legend.position = "bottom") +
NULL
g
ggsave(g, file = "output/span_pvalEstimation.pdf",
height = 12, width = 14)
# what do the big ones and small ones look like 0.35 and below, and then 0.40 and above, and both
span_selection = as.data.frame(as.matrix(1*(apply(span_pvals,2,p.adjust, method = "BH") < FDR_level)))
any_span_selected = names(which(apply(span_selection,1,any)))
low_span_selected = names(which(apply(span_selection[,1:7],1,any)))
high_span_selected = names(which(apply(span_selection[,8:ncol(span_selection)],1,any)))
both_span_selected = intersect(low_span_selected, high_span_selected)
low_span_selected_only = setdiff(low_span_selected, high_span_selected)
high_span_selected_only = setdiff(high_span_selected, low_span_selected)
matplot(t(wcors_hep[low_span_selected_only,]), type = "l", col = "black", lty = 1)
matplot(t(wcors_hep[high_span_selected_only,]), type = "l", col = "black", lty = 1)
i = 10 # diff number
wcors_hep_diff = t(apply(wcors_hep[any_span_selected,],1,function(x) loess(I(diff(x,i)) ~ I(1:(length(x) - i)))$fitted))
wcors_hep_diff_signchanges = apply(wcors_hep_diff, 1, function(x) length(unique(x < 0)))
prop.table(table(wcors_hep_diff_signchanges[low_span_selected_only]))
##
## 1 2
## 0.03389831 0.96610169
prop.table(table(wcors_hep_diff_signchanges[high_span_selected_only]))
##
## 1 2
## 0.1268116 0.8731884
matplot(t(wcors_hep_diff[low_span_selected_only,]), type = "l", col = "black", lty = 1)
matplot(t(wcors_hep_diff[high_span_selected_only,]), type = "l", col = "red", lty = 1, add = TRUE)
pdf("output/span_slopes_density.pdf",height = 8, width = 8)
plot(density(wcors_hep_diff[low_span_selected_only,]), ylim = c(0,30), col = "blue",
main = "Density of slopes of local weighted correlation", lwd = 2)
lines(density(wcors_hep_diff[high_span_selected_only,]), col = "red", lwd = 2)
lines(density(wcors_hep_diff[any_span_selected,]), col = "black", lty = "dotted", lwd = 2)
legend("topright", legend = c("Selected with low span only", "Selected with high span only",
"Selected with any span"),
col = c("blue","red","black"),
lty = c("solid","solid","dotted"),
lwd = 2,
bty = "n")
dev.off()
## quartz_off_screen
## 2
Takes a long time so was run on the cluster.
if (!file.exists("output/subset_output_weightmatch_noperms.RData")) {
source("subset_hep_weightmatch.R")
}
load("output/subset_output_weightmatch_noperms.RData")
subset_p_all_all = do.call(rbind, subset_p_all)
subset_p_all_all$testing <- rep(names(subset_p_all),
times = unlist(lapply(subset_p_all, nrow)))
subset_p_all_all$testingType = unlist(lapply(strsplit(as.character(subset_p_all_all$testing), "_"), "[", 2))
subset_pvals = do.call(cbind,lapply(subset_p_all, "[", "pval"))
colnames(subset_pvals) <- names(subset_p_all)
subset_p_all_all$fullpval = hep_p_all[as.character(subset_p_all_all$genepair),"pval"]
subset_p_all_all$fullfdr = hep_p_all[as.character(subset_p_all_all$genepair),"fdr"]
subset_p_all_all$select = rep("Neither", nrow(subset_p_all_all))
subset_p_all_all[subset_p_all_all$fdr < FDR_level &
subset_p_all_all$fullfdr < FDR_level,"select"] <- "Both"
subset_p_all_all[subset_p_all_all$fdr > FDR_level &
subset_p_all_all$fullfdr < FDR_level,"select"] <- "Full only"
subset_p_all_all[subset_p_all_all$fdr < FDR_level &
subset_p_all_all$fullfdr > FDR_level,"select"] <- "Subset only"
subset_p_all_all$testing <- factor(subset_p_all_all$testing,
levels = rev(gtools::mixedsort(unique(as.character(subset_p_all_all$testing)))))
subset_p_all_all$testing_iter = gsub("_", "", gsub("subset_0.[0-9]0", "", subset_p_all_all$testing))
subset_p_all_all$testingType <- factor(subset_p_all_all$testingType,
levels = rev(gtools::mixedsort(unique(as.character(subset_p_all_all$testingType)))))
g = ggplot(subset_p_all_all, aes(x = -log10(fullpval), y = -log10(pval))) +
geom_scattermore(colour = "grey", pointsize = 1.5) +
geom_point(aes(colour = select), data = subset(subset_p_all_all, select != "Neither"),
size = 1) +
geom_abline() +
# facet_grid(testingType ~ testing, drop = TRUE) +
facet_grid(testingType ~ testing_iter) +
# facet_wrap( ~ testing, ncol = 10) +
# gsub("_", "", gsub("subset_0.[0-9]0", "", "subset_0.90_1"))
xlab("Full data -log10(p-value)") +
ylab("Subset -log10(p-value)") +
scale_colour_manual(values = c("Neither" = "grey",
"Both" = "black",
"Full only" = "blue",
"Subset only" = "red")) +
theme_classic() +
theme(legend.position = "right") +
guides(colour = guide_legend("FDR adjusted\nP-value < 0.2 for:")) +
theme(strip.text.x = element_blank()) +
NULL
ggsave(g, file = "output/subset_scatterplot.pdf", height = 8, width = 18)
pdf("output/subset_qqplot.pdf",height = 3, width = 12.5)
par(mfrow=c(1,5))
for (i in 1:50) {
if (i %% 10 == 1) {
plot(sort(-log10(hep_p_all$pval)), sort(-log10(subset_pvals[,i])), type = "l",
main = paste0(gsub("subset_0.", "", names(subset_p_all)[i]), "% subset"),
xlab = "Full data quantiles",
ylab = "Subset data quantiles");abline(c(0,1), col = "red")
} else {
points(sort(-log10(hep_p_all$pval)), sort(-log10(subset_pvals[,i])), type = "l")
}
}
dev.off()
## quartz_off_screen
## 2
# calculate inclusion frequency for each random subset type
subset_qvals <- apply(subset_pvals,2,p.adjust, method = "BH")
inclusion_frequency = t(apply(subset_qvals, 1, function(x) tapply(x, substring(names(subset_p_all),1,11), function(y)mean(y < FDR_level))))
subset_inclusion_frequency_df = data.frame(
inclusion_frequency,
full_pval = hep_p_all$pval,
full_fdr = hep_p_all$fdr
)
gList = sapply(unique(substring(names(subset_p_all),1,11)), function(sub) {
ggplot(subset_inclusion_frequency_df, aes(y = get(sub), x = -log10(full_pval))) +
geom_point(aes(colour = full_fdr < FDR_level), show.legend = sub == "subset_0.50", alpha = 0.5) +
geom_smooth(colour = "black") +
scale_colour_manual(values = c("TRUE" = "red", "FALSE" = "grey")) +
theme_classic() +
xlab(ifelse(sub == "subset_0.90", "-log10(P-value) from full dataset", "")) +
ylab(ifelse(sub == "subset_0.90", "Proportion of selection at\nFDR adjusted P-value < 0.2 in subset", "")) +
ylim(c(0,1)) +
ggtitle(paste0(gsub("subset_0.","", sub), "% subset")) +
guides(colour = guide_legend("FDR adjusted\nP-value < 0.2")) +
NULL
}, simplify = FALSE)
g = wrap_plots(gList, ncol = 5)
ggsave(g, file = "output/subset_inclusion_frequency.pdf", height = 4, width = 16)
subset_cor = cor(-log10(cbind(all = hep_p_all$pval, subset_pvals)), method = "spearman")
colnames(subset_cor) <- unlist(lapply(strsplit(gsub("subset_", "", colnames(subset_cor)), "_"), "[",1))
colnames(subset_cor)[!(1:ncol(subset_cor) %in% c(1, 6, 16, 26, 36, 46))] <- ""
rownames(subset_cor) <- colnames(subset_cor)
pdf("output/subset_corrplot.pdf", height = 5, width = 5)
g_cor = corrplot(subset_cor,
order = "original",
tl.srt = 45)
dev.off()
## quartz_off_screen
## 2
subset_cor_with_all = data.frame(
cor = subset_cor[1,],
testingType = paste0(c("100", 100*as.numeric(unlist(lapply(strsplit(colnames(subset_pvals), "_"), "[", 2)))), "%"),
testing = c("All", colnames(subset_pvals))
)
subset_cor_with_all$testing <- factor(subset_cor_with_all$testing, levels =
c("All", colnames(subset_pvals)))
subset_cor_with_all$testingType <- factor(subset_cor_with_all$testingType,
levels = rev(gtools::mixedsort(as.character(unique(subset_cor_with_all$testingType)))))
g1 = ggplot(subset_cor_with_all, aes(x = testing, y = cor)) +
geom_col(aes(fill = testingType)) +
theme_classic() +
scale_fill_viridis_d() +
ylab("Correlation of -log10(p-value) with full dataset") +
xlab("Subset percentage scenario") +
theme(axis.text.x = element_text(angle = 90)) +
guides(fill = guide_legend("Subset\npercentage")) +
NULL
g1
ggsave(g1,file = "output/subset_correlations.pdf",height = 6,width = 10)
library(mgcv) # load the package
# randomly subset
wcors_subs = replicate(50, {
sub_ind = sort(sample(1:ncol(hep_exprs_HVG), ceiling(0.6*ncol(hep_exprs_HVG))))
DCARS(hep_exprs_HVG[,sub_ind],
xname = pairs_hep_all["Cdt1_Top2a",1],
yname = pairs_hep_all["Cdt1_Top2a",2],
W = W_hep[sub_ind,sub_ind],
weightedConcordanceFunction = weightedSpearman,
extractWcorSequenceOnly = TRUE)
})
gam_rank = rep(1:nrow(wcors_subs), 50)
gam_response = c(wcors_subs)
gam_fit = gam(gam_response ~ s(gam_rank))
# plot(gam_rank, gam_response)
plot(gam_fit)
points(gam_rank, gam_response, pch = 16, cex = 0.1)
summary(gam_fit)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## gam_response ~ s(gam_rank)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1505865 0.0006213 -242.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(gam_rank) 8.756 8.983 12470 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.901 Deviance explained = 90.2%
## GCV = 0.0047325 Scale est. = 0.0047288 n = 12250
if (!file.exists("output/chol_permstats.Rdata")) {
set.seed(500)
chol_stats_all = apply(wcors_chol,1,sd)
chol_globalCor_all = apply(pairs_chol_all, 1, function(x)
cor(chol_exprs_HVG[x[1],], chol_exprs_HVG[x[2],], method = "spearman")
)
chol_sample = DCARS::stratifiedSample(chol_globalCor_all, length = 500)
if (parallel) {
chol_permstats_raw = mclapply(as.list(chol_sample), function(h) {
DCARSacrossNetwork(chol_exprs_HVG,
edgelist = pairs_chol_all[h, , drop = FALSE],
W = W_chol,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 1000)
}, mc.cores = ncores)
chol_permstats = lapply(chol_permstats_raw, unlist)
names(chol_permstats) <- rownames(pairs_chol_all[chol_sample,])
} else {
chol_permstats_raw = DCARSacrossNetwork(chol_exprs_HVG,
edgelist = pairs_chol_all[chol_sample,],
W = W_chol,
weightedConcordanceFunction = weightedSpearman,
verbose = TRUE,
extractPermutationTestStatistics = TRUE,
niter = 500)
chol_permstats = unlist(chol_permstats_raw, recursive = FALSE)
names(chol_permstats) <- rownames(pairs_chol_all[chol_sample,])
}
save(chol_permstats, chol_globalCor_all, chol_stats_all, file = "output/chol_permstats.Rdata")
} else {
load("output/chol_permstats.Rdata")
}
if (!file.exists("output/chol_p_all.Rds")) {
chol_p_all = estimatePvaluesSpearman(chol_stats_all,
chol_globalCor_all,
chol_permstats,
usenperm = FALSE,
nperm = 10000,
plot = TRUE,
maxDist = 0.1,
verbose = TRUE)
chol_p_all$fdr <- p.adjust(chol_p_all$pval, method = "BH")
saveRDS(chol_p_all, file = "output/chol_p_all.Rds")
} else {
chol_p_all <- readRDS("output/chol_p_all.Rds")
}
globalCors_hep = hep_p_all$globalCor
names(globalCors_hep) <- rownames(hep_p_all)
globalCors_chol = chol_p_all$globalCor
names(globalCors_chol) <- rownames(chol_p_all)
permstatsDF_hep = data.frame(
branch = "hep",
genepair = rep(names(hep_permstats),
times = unlist(lapply(hep_permstats, function(x) length(unlist(x))))),
stat = unlist(hep_permstats))
permstatsDF_hep$globalCor = globalCors_hep[as.character(permstatsDF_hep$genepair)]
df_99_hep = data.frame(
branch = "hep",
globalCor = globalCors_hep[names(hep_permstats)], stat = unlist(lapply(hep_permstats, quantile, 0.99)))
df_99_hep$fitted = loess(stat ~ globalCor, data = df_99_hep)$fitted
permstatsDF_chol = data.frame(
branch = "chol",
genepair = rep(names(chol_permstats), times = unlist(lapply(chol_permstats, function(x) length(unlist(x))))),
stat = unlist(chol_permstats))
permstatsDF_chol$globalCor = globalCors_chol[as.character(permstatsDF_chol$genepair)]
df_99_chol = data.frame(
branch = "chol",
globalCor = globalCors_chol[names(chol_permstats)], stat = unlist(lapply(chol_permstats, quantile, 0.99)))
df_99_chol$fitted = loess(stat ~ globalCor, data = df_99_chol)$fitted
permstatsDF = rbind(permstatsDF_hep,
permstatsDF_chol)
df_99 = rbind(df_99_hep,
df_99_chol)
g = ggplot(permstatsDF, aes(x = globalCor, y = stat, colour = branch)) +
geom_scattermore() +
geom_point(aes(group = branch, colour = branch), data = df_99) +
geom_line(aes(y = fitted, group = branch, colour = branch), data = df_99) +
theme_classic() +
ylab("Permuted test statistic") +
xlab("Global correlation") +
theme(panel.grid = element_blank())
g
ggsave(g, file = "output/stats_globalCor.pdf", height = 8, width = 8)
pairs_hep_sig = pairs_hep_all[rownames(subset(hep_p_all, fdr < FDR_level)),]
wcors_hep_sig = wcors_hep[rownames(pairs_hep_sig),]
dim(pairs_hep_sig)
## [1] 224 2
length(unique(c(pairs_hep_sig)))
## [1] 136
wcors_hep_sig_smooth = t(apply(wcors_hep_sig,1,function(x){
loess(x ~ I(1:length(x)))$fitted
}))
hc = hclust(dist(wcors_hep_sig_smooth, method = "maximum"), method = "complete") #ward.D2
wcors_hep_sig_clusterGenes = cutreeDynamic(
hc,
minClusterSize = 10,
method = "tree",
deepSplit = TRUE,
useMedoids = TRUE
)
k_hep = length(unique(wcors_hep_sig_clusterGenes))
k_hep
## [1] 10
wcors_hep_sig_clusterGenes = cutree(hc, k = k_hep-1)
names(wcors_hep_sig_clusterGenes) <- hc$labels
saveRDS(wcors_hep_sig_clusterGenes, file = "output/wcors_hep_sig_clusterGenes.Rds")
hep_sig_table = cbind(hep_p_all[rownames(pairs_hep_sig),], cluster = wcors_hep_sig_clusterGenes)
wcors_hep_sig_clusterGenes_list = lapply(split(names(wcors_hep_sig_clusterGenes), wcors_hep_sig_clusterGenes), function(h) sort(unique(c(pairs_hep_all[h,]))))
length(wcors_hep_sig_clusterGenes_list)
## [1] 9
wcors_hep_sig_mean = apply(wcors_hep_sig, 2, function(x){
tapply(x,wcors_hep_sig_clusterGenes, mean)
})
sigOrder = rev(paste0("Cluster ",rownames(wcors_hep_sig_mean)[heatmap.2(wcors_hep_sig_mean)$rowInd]))
pdf("output/clustered_wcor_hep.pdf", height = 10, width = 8)
heatmap.2(wcors_hep_sig_mean, trace = "n", Colv = FALSE,
col = colorRampPalette(c("blue","white","red"))(100),
margins = c(8,8),
main = "Hepatocyte")
dev.off()
## quartz_off_screen
## 2
wcors_hep_sigLong = reshape::melt(t(wcors_hep_sig))
wcors_hep_sigLong$cluster = paste0("Cluster ", wcors_hep_sig_clusterGenes[as.character(wcors_hep_sigLong$X2)])
wcors_hep_sigLong$cluster <- factor(wcors_hep_sigLong$cluster,
levels = sigOrder
)
gh = geom_hline(yintercept = 0, linetype = "dotted", colour = "red", size = 1)
g = ggplot(
wcors_hep_sigLong,
aes(x = X1, y = value, group = X2)) +
theme_minimal() +
theme(panel.grid = element_blank()) +
ylim(c(-1,1)) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
xlab("Pseudotime") +
ylab("Local weighted correlation") +
NULL
numSummary_df = data.frame(cluster = sort(unique(wcors_hep_sigLong$cluster)),
num = tapply(wcors_hep_sigLong$X2, wcors_hep_sigLong$cluster, length)/max(wcors_hep_sigLong$X1))
g1 = g +
geom_line() +
theme(axis.text.x = element_blank()) +
theme(axis.line.y = element_line()) +
facet_wrap(~cluster, ncol = length(unique(wcors_hep_sigLong$cluster)), scales = "free") +
theme(strip.text = element_text(size = 12)) +
geom_text(data = numSummary_df,
aes(label = paste0(num, " gene pairs")),
x = 200, y = -0.8, inherit.aes = FALSE) +
xlab("Pseudotime Ranking") +
theme(axis.title = element_text(size = 15)) +
theme(axis.line.y = element_line()) +
scale_y_continuous(breaks = c(-1,0,1), labels = c(-1,0,1), limits = c(-1, 1)) +
geom_hline(yintercept = 0, colour = "black", linetype = "solid", size = 0.5) +
geom_vline(xintercept = n_first, colour = "darkblue", linetype = "dashed", size = 0.5) +
NULL
g1
ggsave(g1, file = "output/hep_wcors_all_clustered.pdf", height = 3.5, width = 1.6*length(unique(wcors_hep_sigLong$cluster)))
wcors_hep_sig_clusterGenes_membership = do.call(cbind, lapply(wcors_hep_sig_clusterGenes_list, function(x){
v = 1*(sort(unique(unlist(wcors_hep_sig_clusterGenes_list))) %in% x)
names(v) <- sort(unique(unlist(wcors_hep_sig_clusterGenes_list)))
return(v)
}))
jacDist = (t(wcors_hep_sig_clusterGenes_membership) %*% wcors_hep_sig_clusterGenes_membership) / (nrow(wcors_hep_sig_clusterGenes_membership) - ((1 - t(wcors_hep_sig_clusterGenes_membership)) %*% (1 - wcors_hep_sig_clusterGenes_membership)))
heatmap.2(jacDist, trace = "n", main = "Jaccard distance of genes within clusters",
density.info = "none",
key.title = "",
key.xlab = "Jaccard distance",
symm = TRUE,
revC = TRUE,
col = colorRampPalette(c("black","yellow")))
Print this out for significantly differentially correlated gene pairs, along with cluster ID.
hep_p_all$sigCluster = wcors_hep_sig_clusterGenes[rownames(hep_p_all)]
hep_p_all$startCor = wcors_hep[rownames(hep_p_all),1]
hep_p_all$branchpointCor = wcors_hep[rownames(hep_p_all),n_first]
hep_p_all$endCor = wcors_hep[rownames(hep_p_all),ncol(wcors_hep)]
hep_p_all_sorted = reshape::sort_df(hep_p_all, "fdr")
write.table(hep_p_all_sorted, file = "output/hep_p_all_sorted.tsv", sep = "\t",
row.names = FALSE, col.names = TRUE, quote = FALSE)
write.table(subset(hep_p_all_sorted, fdr < FDR_level), file = "output/hep_p_all_sorted_sig.tsv", sep = "\t",
row.names = FALSE, col.names = TRUE, quote = FALSE)
GO_list_hep = GO_list[unlist(lapply(GO_list, function(x) any(x %in% nonDE_HVG_hep)))]
if (!file.exists("output/hep_superclusterGenes_GO.Rds")) {
hep_superclusterGenes_GO = lapply(wcors_hep_sig_clusterGenes_list, function(set) {
print("testing..")
genesetGOtest(set, rownames(liver), GO_list_hep)
}
)
saveRDS(hep_superclusterGenes_GO, file = "output/hep_superclusterGenes_GO.Rds")
} else {
hep_superclusterGenes_GO = readRDS("output/hep_superclusterGenes_GO.Rds")
}
gList = lapply(hep_superclusterGenes_GO, function(pval) {
df = data.frame(term = factor(names(pval), levels = c(names(pval), "")),
pval = pval,
qval = p.adjust(pval, method = "BH"))
df$label = df$term
df$label[pval != 1] <- ""
df_sorted = reshape::sort_df(df, "pval")[1:10,]
df_sorted$term = factor(df_sorted$term, levels = rev(df_sorted$term))
g = ggplot(df_sorted, aes(x = term, y = -log10(pval), fill = qval < 0.05)) +
theme_classic() +
geom_col() +
coord_flip() +
xlab("") +
ylab(expression("-log10(P-value)")) +
# geom_hline(yintercept = -log10(0.01), colour = "red", linetype = "dashed", size = 1.2) +
scale_fill_manual(values = c("TRUE" = "red", "FALSE" = "black")) +
theme(legend.position = "none") +
NULL
return(g)
})
gList_named = sapply(seq_len(length(gList)), function(i){
g = gList[[i]] +
scale_fill_manual(values = c("TRUE" = "dimgrey", "FALSE" = "peachpuff")) +
ggtitle(paste0("Cluster ",names(gList)[i])) +
theme(axis.text.y = element_text(size = 10)) +
theme(title = element_text(hjust = 0.5)) +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 40)) +
NULL
ggsave(g, file = paste0("output/hep_superclusters_GO_", i, ".pdf"),
height = 4, width = 7)
return(g)
}, simplify = FALSE)
g_named = patchwork::wrap_plots(gList_named, nrow = 3)
ggsave(g_named, file = "output/hep_superclusters_GO.pdf",height = 10, width = 16)
pairs_chol_sig = pairs_chol_all[rownames(subset(chol_p_all, fdr < FDR_level)),]
dim(pairs_chol_sig)
## [1] 78 2
wcors_chol_sig = wcors_chol[rownames(pairs_chol_sig),]
wcors_chol_sig_smooth = t(apply(wcors_chol_sig,1,function(x){
loess(x ~ I(1:length(x)))$fitted
}))
hc = hclust(dist(wcors_chol_sig_smooth, method = "maximum"), method = "complete")
wcors_chol_sig_clusterGenes = cutreeDynamic(
hc,
minClusterSize = 5,
method = "tree",
deepSplit = TRUE,
useMedoids = TRUE
)
length(unique(wcors_chol_sig_clusterGenes))
## [1] 11
wcors_chol_sig_clusterGenes = cutree(hc, k = length(unique(wcors_chol_sig_clusterGenes))-1)
names(wcors_chol_sig_clusterGenes) <- hc$labels
saveRDS(wcors_chol_sig_clusterGenes, file = "output/wcors_chol_sig_clusterGenes.Rds")
length(unique(wcors_chol_sig_clusterGenes))
## [1] 10
wcors_chol_sig_clusterGenes_list = lapply(split(names(wcors_chol_sig_clusterGenes), wcors_chol_sig_clusterGenes), function(h) sort(unique(c(pairs_chol_all[h,]))))
wcors_chol_sig_mean = apply(wcors_chol_sig, 2, function(x){
tapply(x,wcors_chol_sig_clusterGenes, mean)
})
heatmap.2(wcors_chol_sig, trace = "n", Colv = FALSE,
col = colorRampPalette(c("blue","white","red"))(100),
margins = c(8,8),
RowSideColors = tol12qualitative[wcors_chol_sig_clusterGenes])
matplot(t(wcors_chol_sig_mean), type = "l", lwd = 3)
wcors_chol_sig_clusterGenes_membership = do.call(cbind, lapply(wcors_chol_sig_clusterGenes_list, function(x){
v = 1*(sort(unique(unlist(wcors_chol_sig_clusterGenes_list))) %in% x)
names(v) <- sort(unique(unlist(wcors_chol_sig_clusterGenes_list)))
return(v)
}))
jacDist = (t(wcors_chol_sig_clusterGenes_membership) %*% wcors_chol_sig_clusterGenes_membership) / (nrow(wcors_chol_sig_clusterGenes_membership) - ((1 - t(wcors_chol_sig_clusterGenes_membership)) %*% (1 - wcors_chol_sig_clusterGenes_membership)))
heatmap.2(jacDist, trace = "n", main = "Jaccard distance of genes within clusters",
density.info = "none",
key.title = "",
key.xlab = "Jaccard distance",
symm = TRUE,
revC = TRUE,
col = colorRampPalette(c("black","yellow")))
sigOrder = rev(paste0("Cluster ",rownames(wcors_chol_sig_mean)[heatmap.2(wcors_chol_sig_mean)$rowInd]))
wcors_chol_sigLong = reshape::melt(t(wcors_chol_sig))
wcors_chol_sigLong$cluster = paste0("Cluster ", wcors_chol_sig_clusterGenes[as.character(wcors_chol_sigLong$X2)])
wcors_chol_sigLong$cluster <- factor(wcors_chol_sigLong$cluster,
levels = sigOrder
)
gh = geom_hline(yintercept = 0, linetype = "dotted", colour = "red", size = 1)
g = ggplot(
wcors_chol_sigLong,
aes(x = X1, y = value, group = X2)) +
theme_minimal() +
theme(panel.grid = element_blank()) +
ylim(c(-1,1)) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
xlab("Pseudotime") +
ylab("Local weighted correlation") +
NULL
numSummary_df = data.frame(cluster = sort(unique(wcors_chol_sigLong$cluster)),
num = tapply(wcors_chol_sigLong$X2, wcors_chol_sigLong$cluster, length)/max(wcors_chol_sigLong$X1))
g1 = g +
geom_line() +
theme(axis.text.x = element_blank()) +
theme(axis.line.y = element_line()) +
facet_wrap(~cluster, ncol = 10, scales = "free") +
theme(strip.text = element_text(size = 12)) +
geom_text(data = numSummary_df,
aes(label = paste0(num, " gene pairs")),
x = 200, y = -0.8, inherit.aes = FALSE) +
xlab("Pseudotime Ranking") +
theme(axis.title = element_text(size = 15)) +
theme(axis.line.y = element_line()) +
scale_y_continuous(breaks = c(-1,0,1), labels = c(-1,0,1), limits = c(-1, 1)) +
geom_hline(yintercept = 0, colour = "black", linetype = "solid", size = 0.5) +
geom_vline(xintercept = n_first, colour = "darkblue", linetype = "dashed", size = 0.5) +
NULL
g1
ggsave(g1, file = "output/chol_wcors_all_clustered.pdf", height = 3.5, width = 1.6*length(unique(wcors_chol_sigLong$cluster)))
GO_list_chol = GO_list[unlist(lapply(GO_list, function(x) any(x %in% nonDE_HVG_chol)))]
if (!file.exists("output/chol_superclusterGenes_GO.Rds")) {
chol_superclusterGenes_GO = lapply(wcors_chol_sig_clusterGenes_list, function(set) {
print("testing..")
genesetGOtest(set, rownames(liver), GO_list_chol)
}
)
saveRDS(chol_superclusterGenes_GO, file = "output/chol_superclusterGenes_GO.Rds")
} else {
chol_superclusterGenes_GO = readRDS("output/chol_superclusterGenes_GO.Rds")
}
gList = lapply(chol_superclusterGenes_GO, function(pval) {
df = data.frame(term = factor(names(pval), levels = c(names(pval), "")),
pval = pval,
qval = p.adjust(pval, method = "BH"))
df$label = df$term
df$label[pval != 1] <- ""
df_sorted = reshape::sort_df(df, "pval")[1:10,]
df_sorted$term = factor(df_sorted$term, levels = rev(df_sorted$term))
g = ggplot(df_sorted, aes(x = term, y = -log10(pval), fill = qval < 0.05)) +
theme_classic() +
geom_col() +
coord_flip() +
xlab("") +
ylab(expression("-log10(P-value)")) +
# geom_hline(yintercept = -log10(0.01), colour = "red", linetype = "dashed", size = 1.2) +
scale_fill_manual(values = c("TRUE" = "red", "FALSE" = "black")) +
theme(legend.position = "none") +
NULL
return(g)
})
gList_named = sapply(seq_len(length(gList)), function(i){
g = gList[[i]] +
scale_fill_manual(values = c("TRUE" = "dimgrey", "FALSE" = "peachpuff")) +
ggtitle(paste0("Cluster ",names(gList)[i])) +
theme(axis.text.y = element_text(size = 10)) +
theme(title = element_text(hjust = 0.5)) +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 40)) +
NULL
ggsave(g, file = paste0("output/chol_superclusters_GO_", i, ".pdf"),
height = 4, width = 7)
return(g)
}, simplify = FALSE)
g_named = patchwork::wrap_plots(gList_named, nrow = 3)
ggsave(g_named, file = "output/chol_superclusters_GO.pdf",height = 10, width = 16)
dv_genes_hep = names(which(hep_DV_fdr < 0.1))
dc_genes_hep = sort(unique(c(pairs_hep_sig)))
hep_genepair_type = data.frame(
genepair = hep_p_all$genepair,
gene1 = pairs_hep_all[,1],
gene2 = pairs_hep_all[,2],
sig = ifelse(hep_p_all$fdr < FDR_level, "DC", "not DC")
)
hep_genepair_type$gene1DV = hep_genepair_type$gene1 %in% dv_genes_hep
hep_genepair_type$gene2DV = hep_genepair_type$gene2 %in% dv_genes_hep
hep_genepair_type$type_sum = hep_genepair_type$gene1DV + hep_genepair_type$gene2DV
hep_genepair_type$type = "not DV - not DV"
hep_genepair_type$type[hep_genepair_type$type_sum == 1] <- "not DV - DV"
hep_genepair_type$type[hep_genepair_type$type_sum == 2] <- "DV - DV"
addmargins(table(hep_genepair_type$type, hep_genepair_type$sig))
##
## DC not DC Sum
## DV - DV 145 13221 13366
## not DV - DV 70 7638 7708
## not DV - not DV 9 1072 1081
## Sum 224 21931 22155
fisher.test(table(hep_genepair_type$type, hep_genepair_type$sig))
##
## Fisher's Exact Test for Count Data
##
## data: table(hep_genepair_type$type, hep_genepair_type$sig)
## p-value = 0.423
## alternative hypothesis: two.sided
chisq.test(table(hep_genepair_type$type, hep_genepair_type$sig))$residuals
##
## DC not DC
## DV - DV 0.84834585 -0.08573689
## not DV - DV -0.89855533 0.09081124
## not DV - not DV -0.58365099 0.05898587
# Now for cholangiocyte branch
dv_genes_chol = names(which(chol_DV_fdr < 0.1))
dc_genes_chol = sort(unique(c(pairs_chol_sig)))
chol_genepair_type = data.frame(
genepair = chol_p_all$genepair,
gene1 = pairs_chol_all[,1],
gene2 = pairs_chol_all[,2],
sig = ifelse(chol_p_all$fdr < FDR_level, "DC", "not DC")
)
chol_genepair_type$gene1DV = chol_genepair_type$gene1 %in% dv_genes_chol
chol_genepair_type$gene2DV = chol_genepair_type$gene2 %in% dv_genes_chol
chol_genepair_type$type_sum = chol_genepair_type$gene1DV + chol_genepair_type$gene2DV
chol_genepair_type$type = "not DV - not DV"
chol_genepair_type$type[chol_genepair_type$type_sum == 1] <- "not DV - DV"
chol_genepair_type$type[chol_genepair_type$type_sum == 2] <- "DV - DV"
addmargins(table(chol_genepair_type$type, chol_genepair_type$sig))
##
## DC not DC Sum
## DV - DV 49 7952 8001
## not DV - DV 28 3782 3810
## not DV - not DV 1 434 435
## Sum 78 12168 12246
fisher.test(table(chol_genepair_type$type, chol_genepair_type$sig))
##
## Fisher's Exact Test for Count Data
##
## data: table(chol_genepair_type$type, chol_genepair_type$sig)
## p-value = 0.466
## alternative hypothesis: two.sided
chisq.test(table(chol_genepair_type$type, chol_genepair_type$sig))$residuals
##
## DC not DC
## DV - DV -0.27480761 0.02200222
## not DV - DV 0.75767909 -0.06066288
## not DV - not DV -1.06377638 0.08517027
both_genes = sort(union(c(pairs_hep_all), c(pairs_chol_all)))
getStrength = function(p_all, pairs, genes) {
sumStats = sum(-log10(p_all$fdr))
strength = sapply(genes, function(gene) {
sub_p = p_all[pairs[,1] == gene | pairs[,2] == gene,]
sum((-log10(subset(sub_p, fdr < FDR_level)$fdr)/sumStats))
})
strength[is.na(strength)] <- 0
numZero = sum(strength == 0)
return(strength)
}
strength_hep = getStrength(hep_p_all, pairs_hep_all, both_genes)
strength_chol = getStrength(chol_p_all, pairs_chol_all, both_genes)
strength_A = strength_hep + strength_chol
strength_M = strength_chol - strength_hep
# random cloud of points
if (!file.exists("output/sigCounts.Rds")) {
set.seed(500)
sigCounts = replicate(10000, {
hep_p_all_fake = hep_p_all
chol_p_all_fake = chol_p_all
fake_index_hep = sample(nrow(hep_p_all))
fake_index_chol = sample(nrow(chol_p_all))
hep_p_all_fake$stat = hep_p_all$stat[fake_index_hep]
hep_p_all_fake$fdr = hep_p_all$fdr[fake_index_hep]
chol_p_all_fake$stat = chol_p_all$stat[fake_index_chol]
chol_p_all_fake$fdr = chol_p_all$fdr[fake_index_chol]
strength_hep_fake = getStrength(hep_p_all_fake, pairs_hep_all, both_genes)
strength_chol_fake = getStrength(chol_p_all_fake, pairs_chol_all, both_genes)
strength_A_fake = strength_hep_fake + strength_chol_fake
strength_M_fake = strength_chol_fake - strength_hep_fake
cos_angle = strength_M_fake / strength_A_fake
sigCount = 1*(sqrt(strength_A_fake^2 + strength_M_fake^2) >= sqrt(strength_A^2 + strength_M^2))
return(sigCount)
})
saveRDS(sigCounts, file = "output/sigCounts.Rds")
} else {
sigCounts = readRDS("output/sigCounts.Rds")
}
if (!file.exists("output/random_cos_angle.Rds")) {
set.seed(500)
random_cos_angle = replicate(10000, {
hep_p_all_fake = hep_p_all
chol_p_all_fake = chol_p_all
fake_index_hep = sample(nrow(hep_p_all))
fake_index_chol = sample(nrow(chol_p_all))
hep_p_all_fake$stat = hep_p_all$stat[fake_index_hep]
hep_p_all_fake$fdr = hep_p_all$fdr[fake_index_hep]
chol_p_all_fake$stat = chol_p_all$stat[fake_index_chol]
chol_p_all_fake$fdr = chol_p_all$fdr[fake_index_chol]
strength_hep_fake = getStrength(hep_p_all_fake, pairs_hep_all, both_genes)
strength_chol_fake = getStrength(chol_p_all_fake, pairs_chol_all, both_genes)
strength_A_fake = strength_hep_fake + strength_chol_fake
strength_M_fake = strength_chol_fake - strength_hep_fake
cos_angle = strength_M_fake / strength_A_fake
return(cos_angle)
})
saveRDS(random_cos_angle, file = "output/random_cos_angle.Rds")
} else {
random_cos_angle = readRDS("output/random_cos_angle.Rds")
}
MA_pval = rowMeans(sigCounts)
cos_angle = strength_M / strength_A
dim(random_cos_angle)
## [1] 257 10000
angle_pval = sapply(names(cos_angle), function(gene) {
mean(abs(random_cos_angle[gene,]) > abs(cos_angle[gene]), na.rm = TRUE)
})
# small p-value means branch-specific
table(angle_pval > 0.05 & angle_pval < 0.95, p.adjust(MA_pval, method = "BH") < 0.05)
##
## FALSE TRUE
## FALSE 132 14
## TRUE 23 5
names(which(angle_pval > 0.05 & angle_pval < 0.95 & p.adjust(MA_pval, method = "BH") < 0.05))
## [1] "Bex1" "Cdt1" "Fth1" "Gstm1" "Ivns1abp"
diffGenes = names(which(angle_pval > 0.05 & angle_pval < 0.95 & p.adjust(MA_pval, method = "BH") < 0.05))
strength_df = data.frame(
gene = both_genes,
strength_A = strength_A,
strength_M = strength_M,
MA_pval = MA_pval,
MA_fdr = p.adjust(MA_pval, method = "BH"),
angle_pval = angle_pval,
angle_fdr = p.adjust(angle_pval, method = "BH")
)
strength_df$col = "none"
strength_df$col[strength_df$MA_fdr < 0.05 & strength_df$angle_fdr > 0.05] <- "red"
strength_df$col[strength_df$MA_fdr < 0.05 & strength_df$angle_fdr < 0.05] <- "black"
g = ggplot(strength_df, aes(x = strength_A, y = -strength_M)) +
# geom_point(colour = "grey") +
geom_point(aes(colour = col)) +
theme_minimal() +
geom_text_repel(aes(label = gene, colour = col),
data = subset(strength_df, col != "none"),
fontface = "italic",
size = 5) +
geom_hline(yintercept = 0) +
theme(panel.grid = element_blank()) +
theme(axis.title = element_text(size = 13)) +
xlab("Gene strength sum") +
ylab("Gene strength difference (Hepatocyte - Cholangiocyte)") +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend), arrow = arrow(),
data = data.frame(x = c(0,0), xend = c(0,0),
y = c(0,0), yend = c(0.01,-0.01))) +
geom_text(aes(x = x, y = y, label = label),
data = data.frame(
x = c(0.0017,0.002),
y = c(0.01, -0.01),
label = c("Hepatocyte\nbranch","Cholangiocyte\nbranch")
)) +
# theme(axis.text = element_blank()) +
theme(legend.position = "none") +
scale_colour_manual(values = c("black" = "black", "red" = "red", "none" = "grey"),
labels = c("black" = "Branch specific",
"red" = "Common to either branch")) +
labs(colour = "") +
geom_text(data = data.frame(x = rep(0.008,2), y = -0.011 + c(0,2e-3),
label = c("Branch specific","Common to \neither branch"), col = c("black","red")),
inherit.aes = FALSE,
aes(x = x, y = y, label = label, colour = col),
hjust = 0.5,
fontface = "bold") +
NULL
g
ggsave(g, file = "output/hep_chol_branch_comparison.pdf", height = 5, width = 5)
scales::show_col(tableau_color_pal("Tableau 10")(3))
branchcols = tableau_color_pal("Tableau 10")(3)
names(branchcols) <- c("Hepatocyte","Cholangiocyte","Hepatoblast")
df = data.frame(gene1 = liver_branch_hep["Cdt1",],
gene2 = liver_branch_hep["Top2a",],
rank = rank(liver_pseudotime_hep))
W_hep_weight = W_hep
rownames(W_hep_weight) <- paste0("Weight_", 1:nrow(W_hep_weight))
df = cbind(df, t(W_hep_weight))
df$pseudotime <- liver_pseudotime_hep
df$mean_gene1 <- apply(W_hep, 1, function(w) weightedMean(liver_branch_hep["Cdt1",], w))
df$mean_gene2 <- apply(W_hep, 1, function(w) weightedMean(liver_branch_hep["Top2a",], w))
df$var_gene1 <- hep_DV_wVars["Cdt1",]
df$var_gene2 <- hep_DV_wVars["Top2a",]
df$sd_gene1 <- sqrt(df$var_gene1)
df$sd_gene2 <- sqrt(df$var_gene2)
df$lower_gene1 = df$mean_gene1 - df$sd_gene1
df$upper_gene1 = df$mean_gene1 + df$sd_gene1
df$lower_gene2 = df$mean_gene2 - df$sd_gene2
df$upper_gene2 = df$mean_gene2 + df$sd_gene2
g1 = ggplot(df, aes(x = rank, y = gene1, colour = rank)) +
geom_point() +
geom_ribbon(aes(ymin = lower_gene1, ymax = upper_gene1), alpha = 0.1, colour = NA) +
geom_line(aes(y = mean_gene1), size = 2) +
theme_minimal() +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
theme(axis.title = element_text(size = 15)) +
xlab("") +
ylab(expression(paste(italic("Cdt1"), " Expression"))) +
ggtitle("") +
theme(plot.title = element_text(hjust = 0.5, size = 20)) +
scale_colour_gradient(low = branchcols["Hepatoblast"],
high = branchcols["Hepatocyte"]) +
theme(plot.margin = unit(c(0,0,0,0), "in")) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
NULL
g1
g2 = ggplot(df, aes(x = rank, y = gene2, colour = rank)) +
geom_point() +
geom_ribbon(aes(ymin = lower_gene2, ymax = upper_gene2), alpha = 0.1, colour = NA) +
geom_line(aes(y = mean_gene2), size = 2) +
theme_minimal() +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
theme(axis.title = element_text(size = 15)) +
xlab("Pseudotime Ranking") +
ylab(expression(paste(italic("Top2a"), " Expression"))) +
ggtitle("") +
theme(plot.title = element_text(hjust = 0.5, size = 5)) +
scale_colour_gradient(low = branchcols["Hepatoblast"],
high = branchcols["Hepatocyte"]) +
theme(plot.margin = unit(c(0,0,0,0.2), "in")) +
theme(axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
NULL
g2
g = g1 + g2 + plot_layout(ncol = 2)
ggsave(g, file = "output/Cdt1_Top2a_ribbonPlot.pdf",height = 2.3, width = 12)
gList = sapply(paste0("Weight_", round(seq(1,nrow(W_hep_weight), length.out = 5))), function(weight) {
ggplot(df, aes(x = gene1, y = gene2)) +
geom_point(aes(alpha = get(weight),
size = get(weight),
colour = rank),
pch = 16) + # , size = get(weight)
theme_classic() +
theme(panel.grid = element_blank()) +
theme(axis.line.x = element_line(color="black", size = 1),
axis.line.y = element_line(color="black", size = 1)) +
theme(legend.position = "none") +
geom_density2d(data = subset(df, get(weight) > 0), colour = "black",
size = 0.5, linetype = "solid",
bins = 5) +
xlab("") +
ylab("") +
xlim(c(0,10.2)) +
scale_colour_gradient(low = branchcols["Hepatoblast"],
high = branchcols["Hepatocyte"]) +
scale_size_continuous(range = c(1, 4)) +
scale_alpha_continuous(range = c(0.5, 1)) +
theme(axis.text = element_text(size = 15)) +
theme(axis.title = element_text(size = 20)) +
xlab(ifelse(weight == "Weight_1", expression(italic("Cdt1")), "")) +
ylab(ifelse(weight == "Weight_1", expression(italic("Top2a")), "")) +
ggtitle(
paste0("Local cor: ",
round(weightedSpearman(df$gene1, df$gene2, w = df[,weight]), 2))
) +
theme(plot.title = element_text(size = 20)) +
NULL
}, simplify = FALSE)
g = wrap_plots(gList, nrow = 1)
g
sapply(names(gList), function(n){
ggsave(gList[[n]], file = paste0("output/Top2a_Cdt1_density_scatterplots_", n,".pdf"),height = 3.6, width = 3.6)
})
## $Weight_1
## NULL
##
## $Weight_103
## NULL
##
## $Weight_204
## NULL
##
## $Weight_306
## NULL
##
## $Weight_408
## NULL
ggsave(g, file = "output/Top2a_Cdt1_density_scatterplots.pdf",height = 3.5, width = 14)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin18.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] splines grid stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] mgcv_1.8-31 nlme_3.1-144
## [3] energy_1.7-7 minerva_1.5.8
## [5] UpSetR_1.4.0 GGally_1.4.0
## [7] circlize_0.4.8 corrplot_0.84
## [9] scMerge_1.2.0 M3Drop_1.12.0
## [11] numDeriv_2016.8-1.1 ggridges_0.5.2
## [13] ggrepel_0.8.1 plotly_4.9.2
## [15] gtools_3.8.1 igraph_1.2.4.2
## [17] ggthemes_4.2.0 monocle_2.14.0
## [19] DDRTree_0.1.5 irlba_2.3.3
## [21] VGAM_1.1-2 Matrix_1.2-18
## [23] cowplot_1.0.0 ggpubr_0.2.5
## [25] magrittr_1.5 patchwork_1.0.0
## [27] ggforce_0.3.1 stringr_1.4.0
## [29] org.Mm.eg.db_3.10.0 GO.db_3.10.0
## [31] AnnotationDbi_1.48.0 ComplexHeatmap_2.2.0
## [33] dynamicTreeCut_1.63-1 scattermore_0.4
## [35] reshape_0.8.8 scran_1.14.6
## [37] scater_1.14.6 SingleCellExperiment_1.8.0
## [39] SummarizedExperiment_1.16.1 DelayedArray_0.12.2
## [41] BiocParallel_1.20.1 matrixStats_0.55.0
## [43] Biobase_2.46.0 GenomicRanges_1.38.0
## [45] GenomeInfoDb_1.22.0 IRanges_2.20.2
## [47] S4Vectors_0.24.3 BiocGenerics_0.32.0
## [49] gplots_3.0.1.2 ggplot2_3.2.1
## [51] DCARS_0.3.5
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.0.0 RSQLite_2.2.0 htmlwidgets_1.5.1
## [4] combinat_0.0-8 docopt_0.6.1 Rtsne_0.15
## [7] munsell_0.5.0 codetools_0.2-16 statmod_1.4.34
## [10] withr_2.1.2 colorspace_1.4-1 fastICA_1.2-2
## [13] knitr_1.28 rstudioapi_0.11 ggsignif_0.6.0
## [16] labeling_0.3 slam_0.1-47 bbmle_1.0.23.1
## [19] GenomeInfoDbData_1.2.2 polyclip_1.10-0 bit64_0.9-7
## [22] farver_2.0.3 pheatmap_1.0.12 vctrs_0.2.2
## [25] xfun_0.12 R6_2.4.1 ggbeeswarm_0.6.0
## [28] clue_0.3-57 rsvd_1.0.3 locfit_1.5-9.1
## [31] bitops_1.0-6 assertthat_0.2.1 scales_1.1.0
## [34] nnet_7.3-12 startupmsg_0.9.6 beeswarm_0.2.3
## [37] gtable_0.3.0 rlang_0.4.4 GlobalOptions_0.1.1
## [40] lazyeval_0.2.2 acepack_1.4.1 checkmate_2.0.0
## [43] yaml_2.2.1 reshape2_1.4.3 backports_1.1.5
## [46] Hmisc_4.3-1 tools_3.6.1 RColorBrewer_1.1-2
## [49] proxy_0.4-23 Rcpp_1.0.3 plyr_1.8.5
## [52] base64enc_0.1-3 zlibbioc_1.32.0 purrr_0.3.3
## [55] RCurl_1.98-1.1 densityClust_0.3 rpart_4.1-15
## [58] reldist_1.6-6 GetoptLong_0.1.8 viridis_0.5.1
## [61] sfsmisc_1.1-5 cluster_2.1.0 data.table_1.12.8
## [64] RANN_2.6.1 mvtnorm_1.0-12 distr_2.8.0
## [67] evaluate_0.14 jpeg_0.1-8.1 sparsesvd_0.2
## [70] gridExtra_2.3 shape_1.4.4 HSMMSingleCell_1.6.0
## [73] compiler_3.6.1 bdsmatrix_1.3-4 tibble_2.1.3
## [76] KernSmooth_2.23-16 crayon_1.3.4 htmltools_0.4.0
## [79] Formula_1.2-3 tidyr_1.0.2 DBI_1.1.0
## [82] tweenr_1.0.1 MASS_7.3-51.5 boot_1.3-24
## [85] gdata_2.18.0 pkgconfig_2.0.3 foreign_0.8-75
## [88] vipor_0.4.5 dqrng_0.2.1 XVector_0.26.0
## [91] ruv_0.9.7.1 digest_0.6.24 rmarkdown_2.1
## [94] htmlTable_1.13.3 edgeR_3.28.0 DelayedMatrixStats_1.8.0
## [97] rjson_0.2.20 lifecycle_0.1.0 jsonlite_1.6.1
## [100] BiocNeighbors_1.4.1 viridisLite_0.3.0 limma_3.42.2
## [103] pillar_1.4.3 lattice_0.20-40 httr_1.4.1
## [106] survival_3.1-8 glue_1.3.1 qlcMatrix_0.9.7
## [109] FNN_1.1.3 png_0.1-7 bit_1.1-15.2
## [112] stringi_1.4.6 blob_1.2.1 BiocSingular_1.2.2
## [115] latticeExtra_0.6-29 caTools_1.18.0 memoise_1.1.0
## [118] dplyr_0.8.4